Files
ModelHub XC 15c4f36648 初始化项目,由ModelHub XC社区提供模型
Model: W-61/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200
Source: Original Platform
2026-04-24 11:32:05 +08:00

478 lines
1.3 MiB

{"epoch": 0.0, "step": 1, "batch_size": 128, "mean": -0.024725839495658875, "std": 0.6267476081848145, "min": -2.3379974365234375, "p10": -0.7331054687499999, "median": -0.01795196533203125, "p90": 0.6436386108398433, "max": 1.66827392578125, "pos_frac": 0.4921875, "sample": [0.1674652099609375, 0.092498779296875, 0.1595458984375, -0.044403076171875, -0.722320556640625, -0.1856689453125, -0.7859954833984375, -0.54620361328125, 0.446380615234375, -1.214935302734375, 0.2291259765625, 1.66827392578125, -0.6990966796875, -1.551727294921875, -0.18653106689453125, 0.19101333618164062, -0.7856903076171875, -1.133636474609375, -2.3379974365234375, -0.582763671875, -0.758270263671875, 0.43988037109375, -0.9514541625976562, -0.09796142578125, -0.0359039306640625, 1.14263916015625, -0.6820068359375, -0.6137542724609375, -0.339508056640625, -0.3977317810058594, -1.02557373046875, 0.550262451171875, 1.4956512451171875, 0.2601776123046875, -0.199493408203125, 0.20355224609375, -0.29620361328125, 0.318603515625, -0.0998687744140625, -0.62744140625, -0.710540771484375, -0.70892333984375, 0.5078010559082031, 0.137725830078125, -0.385894775390625, -0.2215576171875, 0.587554931640625, 0.01473236083984375, -0.407562255859375, 0.3812713623046875, -0.08927154541015625, 0.34177398681640625, -0.6257400512695312, -0.23140716552734375, 0.3023681640625, 1.049346923828125, 0.59326171875, 0.4530029296875, 0.039764404296875, -0.214813232421875, -0.042972564697265625, 0.3683624267578125, -0.143951416015625, 0.9142074584960938, -0.645050048828125, 0.5536956787109375, 1.0389938354492188, -0.570037841796875, 0.5283050537109375, 0.406494140625, -0.2256622314453125, 0.79986572265625, -1.50103759765625, -0.093780517578125, -0.199462890625, 0.131805419921875, 0.3006134033203125, 0.037418365478515625, -0.1775360107421875, -0.0821533203125, 0.3060302734375, 0.201385498046875, 1.053680419921875, 0.5845870971679688, -0.2509117126464844, 0.964447021484375, -0.245361328125, 0.7611846923828125, 0.38768768310546875, 0.952484130859375, 0.9595947265625, 0.289459228515625, -0.1939697265625, 0.131011962890625, -1.03314208984375, 0.1519756317138672, 0.210723876953125, 0.2867774963378906, 0.47210693359375, -0.19388961791992188, 0.8321533203125, -0.26869964599609375, -0.19608306884765625, -0.14408111572265625, 0.47847747802734375, 0.55963134765625, -0.65667724609375, 0.0279388427734375, 0.2209625244140625, -0.1755523681640625, -1.31988525390625, -0.29390716552734375, -1.07904052734375, 0.0, -0.0983428955078125, 0.143341064453125, 0.4720001220703125, 0.1136016845703125, 0.24009323120117188, -0.6872100830078125, 0.4634552001953125, 0.46490478515625, -0.1591033935546875, -0.5848388671875, -0.14737701416015625, 0.253753662109375, 0.50762939453125, -0.6018524169921875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000001.npy"}
{"epoch": 0.0020942408376963353, "step": 2, "batch_size": 128, "mean": 0.012230798602104187, "std": 0.6654300093650818, "min": -1.97247314453125, "p10": -0.8530120849609375, "median": 0.042728424072265625, "p90": 0.8162986755371093, "max": 2.1260986328125, "pos_frac": 0.515625, "sample": [-0.946807861328125, 0.5867156982421875, -0.07851600646972656, 0.3317985534667969, -0.14400482177734375, -1.139923095703125, -0.21550750732421875, -0.4138031005859375, 0.31939697265625, 0.5141754150390625, 0.603607177734375, -0.423095703125, -0.39898681640625, 0.06365966796875, 1.42681884765625, -1.2214508056640625, 0.069488525390625, 0.7487945556640625, -1.0627593994140625, 0.75299072265625, -0.0111846923828125, 0.564971923828125, -0.295745849609375, -0.191375732421875, 0.41588592529296875, -0.06989288330078125, -0.0491790771484375, 0.23805999755859375, 0.7451171875, 1.089202880859375, 0.103302001953125, -0.5128173828125, -0.2125396728515625, -0.793853759765625, -0.06198883056640625, 0.03464508056640625, 1.39581298828125, -0.739013671875, -1.128082275390625, 0.9674072265625, 0.29950523376464844, 0.19829559326171875, -0.7109375, 0.547821044921875, 2.1260986328125, 0.19042205810546875, 0.0, -0.1408672332763672, -0.244537353515625, 0.9249114990234375, -0.5446319580078125, -0.7351150512695312, 0.564849853515625, -0.9248046875, 0.498077392578125, -0.6666107177734375, 0.6832046508789062, 0.13818359375, 1.1375579833984375, 0.3428535461425781, 0.10821533203125, -0.3601531982421875, -0.32855224609375, -0.611328125, -1.2871551513671875, 0.349517822265625, -0.5836944580078125, -0.7088851928710938, 0.9993896484375, 0.0679931640625, 0.8251266479492188, 0.114471435546875, -1.3863983154296875, -0.48331451416015625, -0.283416748046875, 0.917938232421875, 0.07848358154296875, -0.04154014587402344, -1.97247314453125, -0.28961181640625, 0.09808349609375, 0.936187744140625, -0.0081024169921875, 0.050811767578125, 0.482147216796875, 0.0656280517578125, 0.35321044921875, 0.188446044921875, 0.33001708984375, 0.06795883178710938, -0.851043701171875, 0.13287353515625, 0.48419189453125, -1.094207763671875, -0.7486114501953125, -0.090087890625, 0.1746826171875, -0.1995086669921875, -0.4486236572265625, 0.1865692138671875, -0.85760498046875, -0.9853515625, 0.0034122467041015625, -0.2874755859375, 0.440765380859375, -0.3415374755859375, -0.322021484375, -0.1216583251953125, 0.4712066650390625, 0.6232757568359375, -0.58184814453125, 0.06168365478515625, 0.619873046875, 0.8125152587890625, -0.04132080078125, 0.397979736328125, -0.12225723266601562, 1.53125, 0.1341552734375, 0.465179443359375, -0.1810455322265625, 1.6507568359375, -0.05999755859375, 0.15135574340820312, -0.925384521484375, -0.35611724853515625, -0.729736328125, 0.33465576171875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000002.npy"}
{"epoch": 0.004188481675392671, "step": 3, "batch_size": 128, "mean": 0.06707523763179779, "std": 0.6671504378318787, "min": -1.3936767578125, "p10": -0.7499053955078124, "median": 0.044114112854003906, "p90": 0.9809661865234374, "max": 1.90057373046875, "pos_frac": 0.515625, "sample": [0.17608642578125, -0.041217803955078125, 0.16083526611328125, -0.14043045043945312, -0.792694091796875, 0.609954833984375, 0.67657470703125, 1.1666793823242188, 0.36065673828125, 0.0233154296875, 0.2574615478515625, -0.238311767578125, 0.44061279296875, 1.69036865234375, 1.90057373046875, 0.19226837158203125, -0.5130844116210938, -0.2127532958984375, -0.56378173828125, -1.09918212890625, -0.3571510314941406, -0.305419921875, 0.23863983154296875, -0.05837440490722656, -0.5650787353515625, 0.471405029296875, 0.8632774353027344, -0.1381664276123047, 0.2818756103515625, 0.3682861328125, -0.54669189453125, -0.0704803466796875, -0.08756256103515625, 1.3969573974609375, 0.1868896484375, 0.907318115234375, -0.1349029541015625, -0.236083984375, -0.16357421875, 0.316131591796875, 0.63934326171875, -1.025482177734375, 1.51904296875, 0.04138374328613281, -0.3878173828125, -0.0460052490234375, -1.3936767578125, 1.121826171875, 0.2611656188964844, -0.1909008026123047, -1.2186279296875, -0.62042236328125, 0.21784210205078125, 0.1390228271484375, 0.3183441162109375, 0.9619140625, -0.459625244140625, -1.067352294921875, -0.943939208984375, 0.23040771484375, 0.8938140869140625, -0.3580474853515625, -0.2611083984375, 0.046844482421875, 0.0, 0.3394775390625, 0.709228515625, -0.530548095703125, 0.5001220703125, 0.177001953125, 0.928436279296875, -0.368011474609375, -0.162384033203125, 0.61480712890625, 1.2655029296875, 0.142822265625, -0.8278884887695312, 1.2615966796875, 0.06855010986328125, 0.35419464111328125, -1.168548583984375, -0.568603515625, -0.3735160827636719, 1.2392578125, 0.43505859375, 0.398529052734375, -0.22857666015625, -0.1320648193359375, 0.596099853515625, -0.2103729248046875, -0.09090995788574219, -0.6985931396484375, -0.97021484375, 0.10439682006835938, -0.6340255737304688, -0.44976043701171875, 0.48211669921875, -0.518096923828125, -0.145050048828125, -0.7315673828125, 1.067291259765625, -0.2421875, 0.6634521484375, 1.6376190185546875, 0.163238525390625, 0.27728271484375, 0.30413055419921875, -0.12554168701171875, 0.0, 0.869903564453125, 0.438629150390625, -1.071502685546875, 0.2490234375, -1.25311279296875, 0.28143310546875, 1.025421142578125, 0.3310089111328125, 0.4352569580078125, -0.807159423828125, -0.5949554443359375, -0.1833038330078125, -0.61834716796875, -0.2809867858886719, -0.4860992431640625, -0.6274642944335938, 1.22265625, 0.13614654541015625, 0.12615966796875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000003.npy"}
{"epoch": 0.0062827225130890054, "step": 4, "batch_size": 128, "mean": 0.013702064752578735, "std": 0.7619850635528564, "min": -2.154541015625, "p10": -1.0577178955078124, "median": 0.0645456314086914, "p90": 0.8747924804687499, "max": 1.765045166015625, "pos_frac": 0.5546875, "sample": [0.3685455322265625, 0.6129302978515625, 0.06232261657714844, 0.5618896484375, -1.943756103515625, -0.164947509765625, 0.57012939453125, -1.17236328125, 0.19854736328125, -1.047637939453125, 0.650115966796875, 1.651123046875, -0.90142822265625, 0.531005859375, 0.77117919921875, 0.07073974609375, 1.044677734375, 0.59637451171875, 0.019622802734375, 0.58599853515625, 0.52288818359375, -0.09652328491210938, -0.755767822265625, 0.19622802734375, 0.2032012939453125, 0.6183319091796875, 0.94122314453125, -1.277191162109375, -0.056304931640625, -0.78533935546875, -0.8243865966796875, -1.177734375, 0.136505126953125, -0.18246078491210938, -0.100982666015625, -0.1177978515625, 0.485626220703125, 1.253631591796875, 0.49908447265625, -0.060333251953125, -2.154541015625, -0.55859375, 0.151641845703125, -0.22540283203125, -0.903656005859375, -0.511962890625, -0.0589447021484375, -0.15126419067382812, 0.113067626953125, 0.322052001953125, -0.15643310546875, -0.611358642578125, 0.548309326171875, -0.91265869140625, 0.0375213623046875, 0.248138427734375, 0.4666595458984375, 0.8936767578125, 1.267578125, 0.647430419921875, 1.5118408203125, 0.700347900390625, -1.1243896484375, -0.6410980224609375, 0.4715576171875, -0.41961669921875, 0.7440185546875, 0.30462646484375, 0.18762969970703125, -0.11767578125, 0.0631256103515625, -1.199981689453125, 1.765045166015625, 0.0327606201171875, 0.221771240234375, 0.17115402221679688, -0.4131622314453125, -1.288787841796875, -1.08123779296875, -0.93560791015625, 0.051910400390625, 0.32373046875, -1.197998046875, 1.184295654296875, -0.77764892578125, 1.324249267578125, -0.644927978515625, 0.156707763671875, -0.03948974609375, 0.08709716796875, -0.46784210205078125, -0.71453857421875, 0.4312591552734375, -1.66802978515625, -1.791412353515625, 0.70440673828125, 0.112762451171875, 0.856292724609375, -0.08621597290039062, -0.26934814453125, 0.1016845703125, 0.250213623046875, 0.42340087890625, 0.01141357421875, 1.45050048828125, -0.1222381591796875, 0.515533447265625, 0.06596565246582031, -0.31219482421875, -0.2750091552734375, -0.0189056396484375, -0.7247314453125, -0.2591552734375, -1.552581787109375, 1.55078125, 0.9063644409179688, 0.426513671875, -0.2978515625, 0.7251052856445312, -0.2125244140625, 0.42816162109375, -0.625732421875, 0.168731689453125, 0.86669921875, -0.046783447265625, 0.567047119140625, 0.86309814453125, -0.58544921875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000004.npy"}
{"epoch": 0.008376963350785341, "step": 5, "batch_size": 128, "mean": -0.05373336374759674, "std": 0.8789262771606445, "min": -2.1412353515625, "p10": -1.2022171020507812, "median": -0.10123062133789062, "p90": 1.1111351013183592, "max": 2.13671875, "pos_frac": 0.4375, "sample": [1.1321182250976562, -1.606689453125, 0.0438385009765625, 1.55908203125, 1.91571044921875, -1.75311279296875, -0.2101287841796875, 0.027740478515625, -0.0376129150390625, 0.197601318359375, -1.3685302734375, 0.39996337890625, -0.05587196350097656, 2.06964111328125, 0.962158203125, -0.934967041015625, -0.35125732421875, 0.24069976806640625, -0.436065673828125, 1.499786376953125, 0.288848876953125, -0.30272674560546875, -0.22238922119140625, -1.573577880859375, 1.5235595703125, -0.9227294921875, -0.756256103515625, 0.5316162109375, -0.9696197509765625, -0.344573974609375, 0.6262054443359375, -0.3827972412109375, -0.08631134033203125, 0.15540313720703125, -0.685089111328125, -0.84747314453125, 2.13671875, -0.1824493408203125, 0.501129150390625, -0.8516845703125, -0.05029296875, 0.07470703125, 0.3027992248535156, -1.20269775390625, -1.2020111083984375, 0.2788963317871094, 0.1660919189453125, 1.776153564453125, 1.2105712890625, -0.042327880859375, 0.89324951171875, -1.684051513671875, 0.008056640625, -0.29205322265625, -1.006866455078125, -0.229461669921875, 0.354736328125, -0.900848388671875, -1.81256103515625, -0.4173583984375, -0.29470062255859375, -1.342376708984375, -1.134521484375, -0.177642822265625, 0.3446197509765625, -0.733184814453125, -0.005462646484375, 0.428680419921875, 0.720611572265625, -0.1923828125, -0.49822998046875, 1.102142333984375, 1.15496826171875, 2.120391845703125, -1.376495361328125, -0.2895622253417969, -0.01214599609375, -0.28509521484375, 0.137359619140625, -0.713714599609375, -0.68243408203125, -0.6683425903320312, 0.04674720764160156, 0.73773193359375, 0.15740966796875, -1.137115478515625, 0.22412109375, -0.15399551391601562, 0.653167724609375, 0.6015167236328125, -0.621612548828125, 0.018861770629882812, -0.4150390625, 0.01554107666015625, -0.11614990234375, -0.6917533874511719, 1.0604934692382812, 0.470306396484375, -0.22986602783203125, -2.1412353515625, -0.8707275390625, 0.2887554168701172, 1.018951416015625, -0.21160888671875, 0.2052764892578125, -0.1468639373779297, -1.217132568359375, -0.568695068359375, 0.44378662109375, 0.819427490234375, -0.2733306884765625, -0.034515380859375, 0.027191162109375, -1.217529296875, -0.327728271484375, -0.24785614013671875, 0.8366241455078125, 0.706146240234375, -0.484710693359375, 0.8523178100585938, 1.223968505859375, -0.71795654296875, 0.783721923828125, -0.885101318359375, 1.9776611328125, -0.18267822265625, -0.4687957763671875, -1.442718505859375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000005.npy"}
{"epoch": 0.010471204188481676, "step": 6, "batch_size": 128, "mean": -0.03836575150489807, "std": 0.673832356929779, "min": -1.708160400390625, "p10": -0.8044075012207031, "median": -0.0702972412109375, "p90": 0.8066131591796873, "max": 2.19482421875, "pos_frac": 0.40625, "sample": [-0.501922607421875, -1.708160400390625, -0.520355224609375, 0.48492431640625, 0.172515869140625, -0.3249053955078125, 0.31499481201171875, -1.475341796875, -0.54669189453125, -0.03481864929199219, -0.05136871337890625, 0.97802734375, 0.550445556640625, 2.19482421875, 0.6775054931640625, -1.0411224365234375, -0.06915283203125, 0.0043487548828125, 0.1238861083984375, -0.5311279296875, -0.2027130126953125, -0.29604339599609375, 1.2011566162109375, -0.4012451171875, -0.1390838623046875, -0.476104736328125, 0.26619720458984375, -0.49053192138671875, -0.016693115234375, 0.44776153564453125, -0.19593048095703125, -0.0877685546875, -0.02874755859375, -0.029449462890625, 1.198822021484375, -0.07532501220703125, 0.241943359375, -0.13568115234375, 0.2844390869140625, 0.66314697265625, -0.863067626953125, -0.07836532592773438, -0.7387847900390625, 0.8831329345703125, -0.66876220703125, 1.45184326171875, -0.700164794921875, 0.00882720947265625, -1.0048828125, 0.39144134521484375, 0.926971435546875, -0.1982421875, -1.18377685546875, 1.981475830078125, 1.2670135498046875, 0.305084228515625, -1.142669677734375, -0.8428573608398438, 0.847991943359375, -0.1628570556640625, -0.37420654296875, -1.360198974609375, -0.029449462890625, -0.0844268798828125, -0.43402099609375, -0.4206390380859375, 0.0592803955078125, -0.1876220703125, 0.0140838623046875, -0.94683837890625, -0.175872802734375, -0.45721435546875, 0.3829193115234375, 0.104095458984375, 0.208038330078125, -0.241424560546875, -1.21087646484375, -0.4491119384765625, -0.011199951171875, 1.550048828125, 1.019256591796875, 0.33221435546875, 0.05621337890625, -0.762664794921875, 0.2614288330078125, -0.2470703125, -0.80328369140625, 0.2164306640625, -0.29496002197265625, -0.4710693359375, -0.620147705078125, -0.779876708984375, -0.04906272888183594, 0.3477783203125, -0.23838043212890625, 0.2836456298828125, -0.308990478515625, -0.413604736328125, -0.781463623046875, 0.21067047119140625, 0.17226409912109375, 0.48431396484375, 0.27083587646484375, 1.781829833984375, -0.254547119140625, 0.16260719299316406, -0.5652847290039062, 0.44439697265625, -0.1435070037841797, -0.004364013671875, -0.063568115234375, -0.5509796142578125, -0.07625579833984375, 0.7128524780273438, 0.0, -0.071441650390625, -0.8070297241210938, -0.30912017822265625, 0.455078125, -0.3008308410644531, -0.10323333740234375, -1.523956298828125, 0.78887939453125, 0.55621337890625, 0.12030029296875, 0.14263916015625, -0.3474998474121094, 0.3221588134765625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000006.npy"}
{"epoch": 0.012565445026178011, "step": 7, "batch_size": 128, "mean": 0.014413803815841675, "std": 0.6349131464958191, "min": -1.924346923828125, "p10": -0.7802810668945311, "median": 0.0499420166015625, "p90": 0.7347030639648436, "max": 1.97369384765625, "pos_frac": 0.5390625, "sample": [-0.489776611328125, 0.355224609375, -1.844573974609375, -0.9383544921875, 0.03635406494140625, 0.544677734375, 1.01983642578125, -0.09686279296875, -0.1935577392578125, 0.405120849609375, 0.0, 0.10468864440917969, -0.14200973510742188, -0.557769775390625, -1.098114013671875, 0.184722900390625, 0.320037841796875, -0.12103271484375, 0.438018798828125, 0.198394775390625, 0.016178131103515625, -0.06437301635742188, -0.16025543212890625, -0.330474853515625, -0.829681396484375, -0.054286956787109375, -0.9276275634765625, -0.214630126953125, 1.092041015625, 0.681243896484375, 0.8467254638671875, 0.4557037353515625, 0.6646270751953125, -0.53167724609375, 0.1620330810546875, 0.598541259765625, -0.24041748046875, 0.543548583984375, -0.598388671875, 1.1241989135742188, -0.026275634765625, -0.66748046875, 0.51043701171875, 0.7603912353515625, -1.924346923828125, -1.20062255859375, -1.35284423828125, 0.0753021240234375, 1.97369384765625, 0.079925537109375, 0.1298675537109375, 0.2862548828125, -0.3413238525390625, -0.0486602783203125, 0.037994384765625, -0.3216552734375, -0.57763671875, -0.630462646484375, -0.7012939453125, 0.152587890625, 0.412322998046875, 0.783447265625, 0.087493896484375, -0.5125732421875, 0.319091796875, 0.5249786376953125, -0.3896942138671875, 0.521942138671875, -0.7271728515625, -0.231964111328125, -0.87139892578125, -0.372894287109375, -1.1309814453125, -1.192108154296875, 0.61004638671875, -0.3165283203125, 0.0127105712890625, -0.91290283203125, 0.4027099609375, 0.3877410888671875, 0.2066516876220703, 0.79510498046875, 0.15897369384765625, 0.4796142578125, -0.1851654052734375, 0.362213134765625, -1.09637451171875, 0.129638671875, -0.621826171875, 0.08396530151367188, -0.170257568359375, 0.7890777587890625, 0.72369384765625, -0.36431884765625, 0.6571044921875, -0.15396690368652344, -0.7352294921875, 0.42043304443359375, -0.1917266845703125, -0.56884765625, 0.330352783203125, 0.95965576171875, -0.2183380126953125, 0.08220291137695312, 0.63946533203125, 0.2096710205078125, 0.03607940673828125, -0.522918701171875, 0.4836082458496094, 0.248992919921875, -0.03983879089355469, 1.1219482421875, 0.4421806335449219, 0.601776123046875, 0.0618896484375, 0.924713134765625, 0.625518798828125, 0.5103759765625, -0.2484588623046875, 0.5441970825195312, -0.33553314208984375, 0.19561004638671875, -0.124053955078125, 1.4935455322265625, -0.057586669921875, -0.464111328125, -0.7591094970703125, 0.408203125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000007.npy"}
{"epoch": 0.014659685863874346, "step": 8, "batch_size": 128, "mean": -0.015087828040122986, "std": 0.77231764793396, "min": -3.653350830078125, "p10": -0.736712646484375, "median": 0.031215667724609375, "p90": 0.7106567382812498, "max": 2.30535888671875, "pos_frac": 0.5546875, "sample": [0.2801055908203125, 0.03472900390625, 0.3503875732421875, 1.07550048828125, -0.395111083984375, 0.68121337890625, -0.4517059326171875, -1.29229736328125, 0.204833984375, 0.30316162109375, -0.1358509063720703, 0.608642578125, 1.2335205078125, 0.224090576171875, -1.7040252685546875, -0.1981964111328125, -0.4517822265625, 0.03204345703125, -0.08056640625, -0.5304107666015625, 1.73748779296875, 0.62371826171875, -0.4112968444824219, -0.0203857421875, 0.3453197479248047, -0.51226806640625, 0.30584716796875, 0.1328716278076172, 0.02996826171875, -0.64617919921875, -1.2493896484375, 0.09900665283203125, 0.01959228515625, 0.44921875, -0.7047119140625, -0.4536476135253906, 0.3335113525390625, 0.49932861328125, -0.558135986328125, -0.2839813232421875, 0.521728515625, -0.07281494140625, 0.0198516845703125, 0.12396240234375, -0.275115966796875, -0.5115280151367188, 0.0517120361328125, -0.158599853515625, -0.72552490234375, -2.13958740234375, -0.56707763671875, 1.1270751953125, 0.2751884460449219, -0.06719970703125, 0.040313720703125, -0.620513916015625, 0.6489105224609375, 0.09193992614746094, 0.4883270263671875, 0.037750244140625, -0.06048583984375, 1.022857666015625, 0.03038787841796875, 0.198638916015625, -0.3489551544189453, 0.77935791015625, 0.11342239379882812, -0.28131103515625, -1.370849609375, -1.556976318359375, 0.474700927734375, 0.011993408203125, -0.7999114990234375, 0.0655670166015625, -0.607147216796875, 0.548614501953125, 1.681671142578125, 0.5985107421875, -0.6326904296875, -0.4556884765625, -0.6531219482421875, -0.1206512451171875, 0.11767578125, 0.589080810546875, 0.0266876220703125, -0.38935089111328125, 0.392822265625, 0.07004547119140625, -0.0282135009765625, 1.77532958984375, 1.56292724609375, -0.7628173828125, 0.50054931640625, -0.263427734375, 0.02325439453125, -0.440582275390625, 0.29736328125, 0.0478515625, 0.3269500732421875, 0.32065582275390625, 0.4356689453125, -0.8914642333984375, -0.1581897735595703, -0.035186767578125, 0.794158935546875, 2.30535888671875, 0.3541259765625, 1.0670166015625, 0.3572998046875, 0.90533447265625, -1.07373046875, -0.222412109375, -2.123565673828125, -0.237060546875, -0.12330245971679688, 0.6371307373046875, 0.05975341796875, -1.36077880859375, 0.4786033630371094, -3.653350830078125, 0.2125701904296875, 0.10839080810546875, -0.22261810302734375, -0.36746978759765625, -0.00858306884765625, 0.1313018798828125, -0.16156005859375, 0.24362754821777344], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000008.npy"}
{"epoch": 0.016753926701570682, "step": 9, "batch_size": 128, "mean": -0.025261014699935913, "std": 0.7742035388946533, "min": -2.83306884765625, "p10": -0.9735122680664062, "median": 0.0, "p90": 0.8361602783203124, "max": 1.9651336669921875, "pos_frac": 0.484375, "sample": [0.6138763427734375, 0.86767578125, -0.529205322265625, -0.4192695617675781, 0.38634490966796875, -0.25212860107421875, -0.9782867431640625, -0.5019912719726562, -0.49713134765625, -2.049468994140625, -0.971466064453125, -0.06209564208984375, -0.090087890625, 0.228515625, 1.117034912109375, -1.222625732421875, -0.22698974609375, -2.83306884765625, 0.5075531005859375, -0.751373291015625, 1.363037109375, -0.58709716796875, -0.1751708984375, 0.198638916015625, -1.345672607421875, -0.417755126953125, -0.455718994140625, 0.0479736328125, 1.47039794921875, -2.43597412109375, 0.22967529296875, 0.24517440795898438, -0.1314697265625, -1.286407470703125, -0.4122467041015625, -0.34717559814453125, 1.1328125, -0.2656707763671875, -0.2308807373046875, 0.170867919921875, -0.36248779296875, 0.5269775390625, 0.3929710388183594, 0.256591796875, -0.64263916015625, 0.395599365234375, -1.197265625, -0.057933807373046875, 1.44696044921875, 0.1712493896484375, 0.10205078125, -0.14721298217773438, 0.5068931579589844, 0.0, -0.125091552734375, 1.123046875, -0.1373443603515625, 0.566986083984375, 0.0, -0.3838653564453125, 0.042236328125, -0.463775634765625, 0.788330078125, -0.5611648559570312, 0.441619873046875, -0.22234344482421875, 0.115264892578125, -0.8349609375, 0.1487579345703125, -0.1559600830078125, 0.86737060546875, 0.4918670654296875, -0.5053787231445312, -0.1593475341796875, -1.647216796875, 0.6734771728515625, 0.61871337890625, -0.213104248046875, -1.633514404296875, -0.066680908203125, -0.5633544921875, -0.347991943359375, 0.21844482421875, -0.4425811767578125, -0.658538818359375, -0.10300827026367188, 0.0, 0.43212890625, 0.372314453125, 0.1669921875, -0.4652099609375, -0.3492393493652344, -0.6382369995117188, -1.48046875, 0.14162445068359375, -1.06719970703125, 0.55438232421875, -0.4002227783203125, 0.0572509765625, 0.462158203125, 0.218780517578125, 0.112884521484375, 0.689971923828125, 0.822784423828125, 1.9651336669921875, 0.47174072265625, 0.9859466552734375, -0.14251708984375, 0.74444580078125, 0.5614776611328125, 1.8287353515625, 0.2653617858886719, 1.37518310546875, 0.5738143920898438, -0.28583526611328125, -0.096466064453125, 0.48834991455078125, 0.047924041748046875, -1.42919921875, -0.41786956787109375, -0.3149566650390625, -0.00543212890625, 0.39306640625, 0.059093475341796875, 0.4148406982421875, 0.73870849609375, 1.183074951171875, 0.365478515625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000009.npy"}
{"epoch": 0.018848167539267015, "step": 10, "batch_size": 128, "mean": 0.18755605816841125, "std": 0.6971707940101624, "min": -2.484954833984375, "p10": -0.54337158203125, "median": 0.12350845336914062, "p90": 1.0943519592285154, "max": 2.53936767578125, "pos_frac": 0.59375, "sample": [1.456787109375, -0.07611083984375, -0.0432586669921875, 0.5823135375976562, -0.32362937927246094, 0.75384521484375, -0.077484130859375, 0.202911376953125, 0.560272216796875, 0.15338134765625, 0.93505859375, 0.2279205322265625, -0.197784423828125, -0.018646240234375, -0.029706954956054688, 0.3616790771484375, -0.19208526611328125, 1.660186767578125, -0.33172607421875, 0.12066650390625, 1.0742721557617188, 1.538818359375, -0.7711868286132812, -1.1455078125, 0.30825042724609375, 0.564117431640625, 0.15618896484375, -0.005340576171875, 0.1088409423828125, 0.0, -0.37249755859375, -1.15966796875, 0.13312530517578125, 0.6302490234375, -0.014926910400390625, 1.73382568359375, 0.66912841796875, -0.52032470703125, 1.344635009765625, 1.7472686767578125, 2.53936767578125, 0.522247314453125, 0.087127685546875, 1.034576416015625, 0.0692138671875, 0.4143829345703125, 0.1079864501953125, -0.685333251953125, 0.1602783203125, 1.4912109375, -0.02024078369140625, 0.2841796875, -0.394378662109375, 0.261016845703125, 0.11145782470703125, -0.9587554931640625, 0.07760810852050781, -0.060089111328125, 0.768310546875, 0.030914306640625, 0.05104827880859375, 0.079071044921875, 0.19207000732421875, -2.484954833984375, 0.2461700439453125, 1.141204833984375, 0.44866943359375, -0.05733299255371094, 1.56463623046875, -0.009761810302734375, -0.12661361694335938, 0.263092041015625, 1.5885009765625, 0.33014488220214844, -0.022705078125, -0.3971748352050781, 0.2959442138671875, -0.12127685546875, 0.24866867065429688, -0.09747314453125, -0.6526336669921875, -0.0308074951171875, -0.3023529052734375, 0.12635040283203125, 0.3165283203125, -0.5396270751953125, 0.2627105712890625, -0.511138916015625, 0.2427978515625, -0.935333251953125, 0.3768310546875, -0.3176536560058594, -0.4948883056640625, -0.16298675537109375, 0.13555908203125, 0.2777252197265625, -0.3820343017578125, 0.7828369140625, 1.057464599609375, 0.2786674499511719, 0.680328369140625, 0.2669525146484375, 0.011051177978515625, -0.5521087646484375, 0.220245361328125, 0.28985595703125, -0.3666534423828125, -0.616790771484375, -0.7273406982421875, 0.7859725952148438, -0.669952392578125, -0.013866424560546875, -0.414093017578125, 0.361419677734375, 0.49127197265625, 0.17156982421875, 0.0270233154296875, 0.642822265625, -0.00555419921875, -0.30017662048339844, 0.451904296875, 0.51873779296875, -0.311737060546875, -0.82391357421875, 2.145355224609375, -0.400482177734375, 0.596038818359375, 1.304412841796875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000010.npy"}
{"epoch": 0.020942408376963352, "step": 11, "batch_size": 128, "mean": -0.0582287460565567, "std": 0.7042607069015503, "min": -1.54541015625, "p10": -0.8844940185546875, "median": -0.14947891235351562, "p90": 0.7487205505371093, "max": 2.2657470703125, "pos_frac": 0.4140625, "sample": [1.40350341796875, 0.548095703125, 0.444610595703125, -0.179443359375, 0.0176849365234375, -0.7635498046875, -1.54541015625, -0.37445068359375, -0.316680908203125, -0.20302772521972656, -0.972808837890625, 0.7145767211914062, -0.64288330078125, -0.56396484375, 0.60614013671875, 0.37103271484375, 1.3787841796875, -0.3751811981201172, -0.1457672119140625, -0.2484416961669922, -0.059356689453125, 0.33917236328125, -0.1694812774658203, 0.010274887084960938, -0.08182144165039062, -0.138763427734375, -0.4939537048339844, 0.215606689453125, 0.081024169921875, 0.7429275512695312, -0.4830322265625, -0.367919921875, -0.191864013671875, -0.4811859130859375, -0.01776123046875, 0.910736083984375, 0.37579345703125, -0.22280502319335938, -0.5494537353515625, -0.29276275634765625, -0.21448326110839844, 0.686676025390625, -1.37982177734375, 1.2913818359375, -0.694061279296875, 1.25830078125, 0.305694580078125, 1.61029052734375, -0.48114013671875, -0.22340774536132812, -1.23162841796875, -0.305572509765625, -0.16247940063476562, 0.240966796875, 0.633575439453125, -0.720245361328125, -0.3558502197265625, 0.10903739929199219, 0.1638641357421875, 0.094818115234375, 0.2332611083984375, -0.145263671875, -0.866607666015625, -0.214263916015625, 2.2657470703125, 0.317962646484375, 0.709136962890625, -0.14144134521484375, 0.6721649169921875, -0.77398681640625, -0.036346435546875, -0.171173095703125, -0.674896240234375, 0.4478607177734375, -0.7308502197265625, 0.198699951171875, 0.94073486328125, -0.24969482421875, -1.23431396484375, -1.4928741455078125, -0.192169189453125, -0.3768310546875, -0.12554359436035156, 0.634918212890625, 0.4749755859375, -1.40924072265625, 0.397186279296875, -1.176513671875, -0.1031494140625, -0.640472412109375, -0.2645263671875, -0.14360809326171875, 1.978271484375, 0.28265380859375, -1.3592529296875, -0.458984375, -0.2503662109375, 0.777435302734375, 0.13726806640625, -0.5481109619140625, -0.3207244873046875, 1.181243896484375, 0.25731658935546875, -0.867950439453125, 0.2357177734375, 0.1533660888671875, -0.241424560546875, -1.12933349609375, -1.0818634033203125, 0.08538818359375, 1.39410400390625, -0.923095703125, -0.15319061279296875, 0.5272750854492188, -0.778076171875, -0.38836669921875, -1.031280517578125, -0.35794830322265625, 0.6309967041015625, 0.30987548828125, -0.340087890625, 0.31548500061035156, 0.762237548828125, -0.7139892578125, 0.0524444580078125, -0.3829307556152344, -0.78955078125, 0.5491790771484375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000011.npy"}
{"epoch": 0.023036649214659685, "step": 12, "batch_size": 128, "mean": 0.1287868320941925, "std": 0.844042181968689, "min": -2.45404052734375, "p10": -0.8701263427734375, "median": 0.058849334716796875, "p90": 1.0943908691406248, "max": 3.001495361328125, "pos_frac": 0.546875, "sample": [1.98516845703125, -0.1326904296875, -0.8291015625, -0.563323974609375, -1.0376663208007812, -0.10479736328125, 2.156463623046875, 0.50933837890625, -0.39925384521484375, 0.158843994140625, -0.32684326171875, 0.10980224609375, -0.88531494140625, 0.90087890625, 1.1817474365234375, -0.0990447998046875, -0.14742279052734375, 0.7389678955078125, 0.5947494506835938, 0.86688232421875, -0.1815185546875, 0.08068084716796875, -0.16815185546875, 0.7972412109375, 0.475677490234375, -0.2106475830078125, 0.315032958984375, -0.0760650634765625, -1.1628646850585938, -0.45538330078125, 1.9015960693359375, -0.192474365234375, 0.5311965942382812, 0.45575714111328125, 0.0377960205078125, 0.1547698974609375, 0.11670303344726562, 0.05987548828125, 1.17041015625, -0.2042388916015625, 0.886199951171875, 1.19970703125, 0.9454345703125, -0.6674957275390625, -0.3844451904296875, -1.050506591796875, 0.3997802734375, 0.19128799438476562, 0.5825653076171875, -0.0735321044921875, 0.08746719360351562, -0.16925048828125, -0.770599365234375, -0.39734649658203125, 0.3813323974609375, 0.09234619140625, -0.08955574035644531, -0.863616943359375, -0.017236709594726562, -0.2317047119140625, -0.15576934814453125, 0.273193359375, 0.0728759765625, 0.365081787109375, 1.73797607421875, 1.4423065185546875, -0.33817291259765625, 0.1945953369140625, 0.628265380859375, 1.02984619140625, -0.1090087890625, -0.4184112548828125, 0.6671142578125, 0.03643798828125, 0.029529571533203125, -0.4265251159667969, -0.62554931640625, 1.046356201171875, 0.5904388427734375, 1.1322021484375, 0.01416015625, -0.61474609375, 0.300201416015625, 1.2525634765625, -2.45404052734375, 0.7176513671875, -1.9001312255859375, -0.697479248046875, 0.98486328125, 2.734222412109375, 3.001495361328125, 0.1692047119140625, -0.570098876953125, 0.39312744140625, 0.63555908203125, -0.122528076171875, -0.7822265625, 0.39411163330078125, -0.083465576171875, 0.93292236328125, 0.05782318115234375, 0.2540855407714844, 1.692413330078125, -0.063873291015625, 0.42938232421875, -0.965728759765625, -1.198822021484375, 0.3545989990234375, 0.59881591796875, 1.07818603515625, 0.022247314453125, -0.136993408203125, -0.45890045166015625, -0.10407257080078125, -0.167083740234375, 0.2871246337890625, -0.4254302978515625, -1.13311767578125, -1.866363525390625, 0.9298095703125, -1.180084228515625, -0.594879150390625, -0.1497955322265625, -1.0098876953125, 0.9997100830078125, 0.27796173095703125, -1.070220947265625, 0.3760528564453125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000012.npy"}
{"epoch": 0.025130890052356022, "step": 13, "batch_size": 128, "mean": 0.03348757326602936, "std": 0.7570226192474365, "min": -2.18597412109375, "p10": -0.9988220214843749, "median": 0.06764793395996094, "p90": 0.9186584472656248, "max": 2.4635009765625, "pos_frac": 0.5546875, "sample": [-0.40833282470703125, 0.2862892150878906, -0.3030853271484375, 2.1224365234375, 0.9078369140625, -0.34814453125, 1.4626235961914062, -0.0700531005859375, 0.72637939453125, -0.2161865234375, 0.741485595703125, -0.46849822998046875, 0.593963623046875, 1.021728515625, -0.3177337646484375, -0.655303955078125, -0.678558349609375, 1.144775390625, 0.445098876953125, -0.177337646484375, -0.19823074340820312, 1.103729248046875, 0.529266357421875, 0.8427734375, 0.045654296875, 0.2275543212890625, -0.06622314453125, 0.664947509765625, 0.69952392578125, 0.08213424682617188, 2.4635009765625, -0.9927978515625, 0.5842437744140625, -1.606689453125, 1.28485107421875, -1.1709442138671875, 0.024688720703125, 1.037811279296875, 0.16031265258789062, 0.1085052490234375, 0.28984832763671875, 0.05316162109375, 0.43939208984375, 0.5095062255859375, -0.6854095458984375, -1.5423126220703125, -1.31317138671875, 0.0, 0.440887451171875, -0.29149436950683594, -0.91192626953125, -0.5103302001953125, 0.036712646484375, 0.39083099365234375, 0.04993438720703125, -0.30136871337890625, -0.406829833984375, -0.49755859375, 0.261199951171875, 0.2986297607421875, 0.0893402099609375, 0.47430419921875, -0.163238525390625, -1.451873779296875, -0.03395843505859375, 0.37143707275390625, 0.19635009765625, -1.66766357421875, 0.10281181335449219, 0.157623291015625, -0.1554718017578125, 0.949462890625, 0.8126220703125, -0.37093544006347656, 0.30814552307128906, 1.325653076171875, -0.65771484375, -1.01287841796875, 0.372955322265625, 0.324493408203125, 0.4043922424316406, -0.624755859375, 0.9942626953125, -0.55645751953125, 0.593048095703125, 0.89288330078125, -2.18597412109375, 0.17608642578125, -0.2461090087890625, 0.6280288696289062, 0.099578857421875, 0.4464569091796875, -1.2215576171875, -0.8111572265625, -0.7204132080078125, 0.029979705810546875, 0.28327178955078125, 0.0378570556640625, 0.13741493225097656, -0.2713775634765625, 0.3167877197265625, -0.236541748046875, -1.8428955078125, -1.2806243896484375, -0.3089447021484375, -0.157470703125, 0.470062255859375, -0.09824371337890625, 0.58984375, 0.2720794677734375, 0.6050949096679688, 0.10106277465820312, 0.138580322265625, -1.3021240234375, 0.5884628295898438, -0.23987579345703125, -0.1830902099609375, 0.94390869140625, 0.3549041748046875, 1.346038818359375, -0.03078460693359375, -0.0405426025390625, 0.507598876953125, -0.04389190673828125, -1.35076904296875, -0.4222297668457031, -0.2154998779296875, -0.19510650634765625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000013.npy"}
{"epoch": 0.027225130890052355, "step": 14, "batch_size": 128, "mean": 0.04815584421157837, "std": 0.7984738945960999, "min": -2.60882568359375, "p10": -0.8006713867187499, "median": 0.025834083557128906, "p90": 0.8884834289550777, "max": 2.58734130859375, "pos_frac": 0.5078125, "sample": [0.5639801025390625, 0.182952880859375, -0.07457733154296875, -1.56768798828125, 1.032470703125, 0.835906982421875, 0.9797592163085938, 0.49322509765625, -0.215911865234375, 0.624176025390625, 0.21411895751953125, 0.563507080078125, 0.0370941162109375, -0.4030303955078125, -0.596435546875, 0.30255126953125, -1.350128173828125, 1.2587890625, 1.4463348388671875, -0.083251953125, 0.054195404052734375, -0.6323089599609375, -0.5330810546875, -0.2904510498046875, 0.24059295654296875, 0.34079742431640625, -0.8720550537109375, -0.3156890869140625, 1.39422607421875, 0.48638916015625, -0.00832366943359375, 0.80255126953125, 0.5356521606445312, 0.1797637939453125, -0.22119140625, -0.012115478515625, -0.397369384765625, -0.4423408508300781, -0.0777587890625, 0.52301025390625, 0.24849700927734375, -0.3041706085205078, -0.3284912109375, 0.6570053100585938, 0.1921539306640625, 0.48480224609375, 0.744964599609375, -0.357696533203125, -0.265045166015625, 1.025604248046875, 0.5629196166992188, 0.5543060302734375, -0.131439208984375, 0.6676254272460938, 0.5781326293945312, 1.0213623046875, -0.88702392578125, -0.12871932983398438, -0.17487716674804688, -0.00710296630859375, -0.266021728515625, 2.102294921875, -0.974273681640625, -0.729095458984375, -0.038372039794921875, -0.248687744140625, 0.3398590087890625, 0.6837234497070312, 0.014574050903320312, 0.71435546875, 0.524139404296875, 2.58734130859375, -0.390472412109375, -1.12030029296875, -0.41973876953125, -0.0102691650390625, 0.0906829833984375, -0.317291259765625, 0.552001953125, 0.27978515625, -0.22207260131835938, 0.7222900390625, 0.202880859375, 0.24095916748046875, -0.5683326721191406, 1.400604248046875, 0.20316314697265625, -0.669342041015625, -0.1995086669921875, 0.2975273132324219, -0.441497802734375, 0.558380126953125, -0.7646484375, 0.03948974609375, 1.30535888671875, 0.354278564453125, -0.17236328125, -0.786956787109375, 2.3375244140625, 0.286224365234375, 0.0, 0.29209136962890625, -0.1169891357421875, -0.758880615234375, -1.689697265625, -1.547821044921875, -0.40899658203125, -0.15069580078125, 0.2601318359375, 1.397216796875, -0.206573486328125, -0.832672119140625, -0.015716552734375, -2.19384765625, 0.409027099609375, -0.0540313720703125, -0.1233062744140625, 0.1270751953125, 0.08966064453125, 0.849365234375, -2.60882568359375, -0.6498565673828125, 0.7406005859375, -2.019500732421875, -0.1414337158203125, 0.142974853515625, -1.448974609375, 0.174285888671875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000014.npy"}
{"epoch": 0.02931937172774869, "step": 15, "batch_size": 128, "mean": -0.011715203523635864, "std": 0.6603440046310425, "min": -1.57720947265625, "p10": -0.8004776000976562, "median": -0.021383285522460938, "p90": 0.7633666992187499, "max": 1.9434814453125, "pos_frac": 0.4765625, "sample": [0.6649627685546875, 0.9423370361328125, -0.2137451171875, 1.885498046875, -0.79559326171875, 0.74951171875, -1.001861572265625, -0.2620849609375, -0.2424163818359375, 0.14538002014160156, -0.46254730224609375, 0.1414642333984375, 0.522430419921875, -0.783660888671875, 0.1238555908203125, -0.11394500732421875, 0.53961181640625, 0.4628753662109375, 0.78643798828125, -1.030487060546875, -0.12890243530273438, 1.454803466796875, 0.373046875, -1.44921875, -0.074951171875, -0.04058265686035156, 0.31829833984375, -1.109649658203125, -0.12750244140625, 0.8219451904296875, 0.7043304443359375, 0.35562896728515625, 0.209930419921875, 0.12186431884765625, -0.76873779296875, -0.126190185546875, -0.57391357421875, -1.17333984375, 0.594573974609375, 0.39183807373046875, -0.42840576171875, -0.3210906982421875, 0.138336181640625, -0.219390869140625, -1.1036376953125, 0.25079345703125, -0.08624267578125, 1.278778076171875, 0.015228271484375, 1.9434814453125, 0.31584930419921875, -0.01367950439453125, 0.2187347412109375, -1.508880615234375, -1.197357177734375, -0.64593505859375, 0.7061767578125, -0.473846435546875, 1.0237274169921875, 0.45526123046875, -0.12164306640625, 1.1062164306640625, 0.39562225341796875, 0.257232666015625, -0.6570587158203125, 0.23150634765625, 0.28392601013183594, -1.18695068359375, -0.0042724609375, 0.044097900390625, -0.546630859375, -0.49199676513671875, 0.9666213989257812, -0.649810791015625, -0.916900634765625, -0.4866943359375, -0.23992919921875, -0.0677490234375, -0.02893829345703125, -0.041748046875, -0.45123291015625, -0.321990966796875, 0.737060546875, -0.2470855712890625, 0.415374755859375, -0.033966064453125, -1.35357666015625, -0.2781410217285156, -0.11695480346679688, -1.57720947265625, -0.2317047119140625, -0.663421630859375, 0.60003662109375, 0.12671661376953125, 0.45380401611328125, -0.39599609375, -0.8118743896484375, 0.49275970458984375, 0.27835845947265625, -0.6490478515625, -0.629150390625, -0.72955322265625, -0.013828277587890625, 0.6102752685546875, 0.8648681640625, -0.21263885498046875, -0.471282958984375, 0.002655029296875, 0.02281951904296875, -0.14270401000976562, 0.222198486328125, -0.09719276428222656, 0.5229949951171875, 0.4882659912109375, 0.190673828125, 1.0857696533203125, 0.993621826171875, -0.19287872314453125, -0.34378814697265625, 0.46976470947265625, -0.6088981628417969, 0.08807373046875, 0.75347900390625, 0.49969482421875, -0.7347412109375, 0.01898193359375, 0.2093963623046875, -0.3624267578125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000015.npy"}
{"epoch": 0.031413612565445025, "step": 16, "batch_size": 128, "mean": 0.09544751048088074, "std": 0.7384019494056702, "min": -3.03289794921875, "p10": -0.7480545043945311, "median": 0.08419227600097656, "p90": 1.0060592651367186, "max": 1.795501708984375, "pos_frac": 0.5859375, "sample": [0.0354766845703125, -0.3114585876464844, 0.2790374755859375, -0.52679443359375, 0.227264404296875, 1.086669921875, 0.81329345703125, 0.24212646484375, -2.022705078125, 0.223114013671875, 0.274383544921875, -0.11126708984375, 0.497894287109375, 0.043666839599609375, 0.0386810302734375, 0.630462646484375, 0.041229248046875, 1.2440185546875, -0.12073516845703125, 1.1775970458984375, -0.36876678466796875, 1.795501708984375, 0.090118408203125, -0.014772415161132812, 0.058837890625, 0.03003692626953125, 0.574737548828125, 1.1038818359375, 0.914947509765625, -0.4329833984375, -0.64013671875, 0.529632568359375, -0.730682373046875, 0.20684814453125, 0.28985595703125, 1.05059814453125, -0.57403564453125, 0.45641326904296875, -0.1865520477294922, 0.35193634033203125, -0.2113494873046875, 0.47849273681640625, -0.694610595703125, -0.19647216796875, -0.46344757080078125, -0.31512451171875, 0.53839111328125, 1.38763427734375, 0.7578125, -0.418060302734375, 0.07826614379882812, 0.82464599609375, -1.1248779296875, -0.155548095703125, 1.021392822265625, -0.0137176513671875, -0.9598388671875, 0.2882080078125, 0.0, 0.4178466796875, 0.7389068603515625, 0.81292724609375, 0.533905029296875, -0.082183837890625, -3.03289794921875, -1.320068359375, -0.18050384521484375, 0.5417137145996094, 0.5419158935546875, 0.2682342529296875, 0.0170135498046875, -0.54278564453125, -0.316741943359375, -0.2720222473144531, -0.65252685546875, 0.740478515625, 0.24050140380859375, -0.1383514404296875, -0.7885894775390625, -0.84686279296875, -0.4927978515625, -0.1263427734375, -0.722808837890625, 0.5694580078125, 0.40970611572265625, 0.00089263916015625, 0.51953125, 0.29734039306640625, 1.36395263671875, 1.54815673828125, 0.3225250244140625, 0.04638671875, -0.08807373046875, 0.1192779541015625, 0.951019287109375, -0.7198486328125, 0.8266143798828125, -0.346710205078125, -0.4356689453125, 0.5797882080078125, 1.1628570556640625, -0.2598381042480469, -1.320587158203125, 0.3135833740234375, -0.92230224609375, 0.473480224609375, -1.17474365234375, -0.329010009765625, 0.728912353515625, 0.24883651733398438, -0.0474700927734375, -1.501556396484375, -1.03546142578125, 0.957855224609375, 0.6987380981445312, 1.275970458984375, 0.39007568359375, -0.445526123046875, 1.013397216796875, 0.36115264892578125, -0.287322998046875, 0.39825439453125, 0.07550048828125, 1.0029144287109375, -1.23931884765625, -0.35296630859375, 0.6689453125, 0.971435546875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000016.npy"}
{"epoch": 0.033507853403141365, "step": 17, "batch_size": 128, "mean": 0.011074408888816833, "std": 0.7554517388343811, "min": -2.263336181640625, "p10": -0.7597923278808594, "median": -0.0564422607421875, "p90": 0.995135498046875, "max": 2.7623291015625, "pos_frac": 0.4765625, "sample": [0.8711471557617188, -0.2275238037109375, 1.749755859375, -0.4045867919921875, -0.0506744384765625, 0.6221923828125, 0.2159271240234375, 0.4579429626464844, 0.42340087890625, -0.712890625, -0.785064697265625, -0.1134033203125, -1.355194091796875, 0.81976318359375, 0.472930908203125, -0.28746795654296875, -1.4149169921875, 0.1031494140625, -0.77899169921875, 0.22982025146484375, -0.6233978271484375, -1.750762939453125, 0.28467559814453125, 0.992462158203125, 2.7623291015625, 0.131195068359375, -0.489044189453125, -0.124725341796875, -2.263336181640625, -0.3096160888671875, 0.607208251953125, 0.750396728515625, 0.016632080078125, 0.0274200439453125, 0.973602294921875, 1.33941650390625, 1.03277587890625, 0.055206298828125, 0.13513565063476562, 0.008289337158203125, -0.9405517578125, -2.0104522705078125, -0.153411865234375, 0.34197998046875, -0.3541259765625, 2.0118255615234375, -0.5417556762695312, -0.4325714111328125, -0.2801971435546875, -0.0622100830078125, 1.270660400390625, -0.743316650390625, 0.3674430847167969, 0.91619873046875, 1.41204833984375, -0.279541015625, 0.30669403076171875, 0.56463623046875, -0.26702880859375, -0.529449462890625, -0.7332611083984375, -0.9542236328125, -0.654998779296875, -0.47747802734375, -0.06781005859375, -0.1004791259765625, 0.2994537353515625, -0.6686248779296875, 0.67919921875, -0.7975921630859375, 0.3216705322265625, -0.473907470703125, -0.15957069396972656, -0.443511962890625, 0.55517578125, 0.16522216796875, 0.35443878173828125, 0.1478118896484375, 1.0843658447265625, 0.38836669921875, -0.41925048828125, 0.0037994384765625, 0.70263671875, -0.185302734375, -0.3235321044921875, 0.103424072265625, -0.041168212890625, -0.093902587890625, 0.811431884765625, 1.001373291015625, -0.14555931091308594, 0.506622314453125, -0.1089324951171875, -0.52484130859375, -0.55389404296875, -0.06594467163085938, 1.2470703125, -0.949615478515625, 0.546539306640625, 0.48160552978515625, 0.0184173583984375, -0.7515640258789062, -0.25359344482421875, 0.2014904022216797, -0.20867919921875, -0.600341796875, 0.643646240234375, -1.4244384765625, -0.13990020751953125, 0.07413482666015625, -0.15509033203125, -0.16013336181640625, -0.6732635498046875, -0.34066009521484375, -0.302490234375, -0.044734954833984375, -1.494903564453125, 1.200103759765625, -0.567718505859375, 0.1169586181640625, -0.18454742431640625, 1.073883056640625, -0.4514617919921875, 0.098968505859375, 1.06683349609375, 0.292877197265625, 0.100799560546875, -0.16192626953125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000017.npy"}
{"epoch": 0.0356020942408377, "step": 18, "batch_size": 128, "mean": 0.09068909287452698, "std": 0.7952203154563904, "min": -1.828338623046875, "p10": -0.8435821533203124, "median": 0.10802078247070312, "p90": 1.0424911499023435, "max": 2.2301025390625, "pos_frac": 0.546875, "sample": [-0.1753692626953125, 0.5115966796875, -0.07033920288085938, -0.385528564453125, -0.0076141357421875, 0.528839111328125, 0.166015625, -0.16367340087890625, -0.73529052734375, -0.0223388671875, 0.2978515625, 0.97052001953125, 0.376556396484375, -0.40631103515625, -0.093353271484375, 0.7733154296875, -0.4609375, 0.861724853515625, 2.0433349609375, 1.26190185546875, -0.23266220092773438, -0.1364898681640625, 0.350494384765625, -0.44080352783203125, -0.81414794921875, 0.2447357177734375, -0.06671142578125, 0.3909149169921875, -0.29461669921875, 1.32720947265625, 0.3668212890625, -1.16339111328125, 0.18886566162109375, -1.8270263671875, 0.20058441162109375, -0.5234489440917969, 0.00079345703125, 0.4295387268066406, 0.01621246337890625, -1.18060302734375, -1.617889404296875, 1.0904388427734375, 1.5361328125, -0.595611572265625, -0.44720458984375, 0.6145782470703125, 0.95123291015625, 0.40545654296875, 0.804931640625, 1.89605712890625, 0.20063400268554688, -0.13385772705078125, -0.130859375, -0.247802734375, -1.436920166015625, 2.20904541015625, -0.3025054931640625, -0.5367965698242188, 0.1954803466796875, -0.384307861328125, 0.1642913818359375, -0.295684814453125, 1.13946533203125, -0.912261962890625, 0.17869186401367188, -0.18508529663085938, 1.36688232421875, 0.223114013671875, 0.42183685302734375, 1.751434326171875, 2.2301025390625, -1.6630859375, 1.45526123046875, 0.256103515625, 0.5258331298828125, 0.2484283447265625, -0.33538818359375, -0.0360107421875, -0.298797607421875, -0.21832275390625, -0.26483154296875, -1.0931396484375, -0.6628265380859375, 0.17966842651367188, 0.12111663818359375, -1.42633056640625, 0.789794921875, -0.212493896484375, -0.46807861328125, 0.2995758056640625, 0.494354248046875, 0.1482086181640625, 0.460723876953125, -0.56732177734375, 0.744964599609375, -0.020751953125, 0.121063232421875, 0.741943359375, 0.0044097900390625, 0.194122314453125, 0.77886962890625, 0.32138824462890625, -0.338226318359375, 0.0770111083984375, 0.46124267578125, -0.07246780395507812, -0.691253662109375, -0.3291015625, -0.009368896484375, -0.6863174438476562, -1.3519287109375, 1.021942138671875, 0.339569091796875, 0.37880706787109375, 0.09497833251953125, -1.5729827880859375, -0.043487548828125, 0.63677978515625, 0.549774169921875, 0.46833038330078125, 0.648193359375, 0.9937744140625, -1.5040283203125, 0.43572998046875, 1.20892333984375, -0.2276611328125, 0.069671630859375, -1.828338623046875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000018.npy"}
{"epoch": 0.03769633507853403, "step": 19, "batch_size": 128, "mean": 0.10697628557682037, "std": 0.7209071516990662, "min": -2.25653076171875, "p10": -0.7287994384765625, "median": 0.11260986328125, "p90": 0.9422790527343747, "max": 2.677703857421875, "pos_frac": 0.59375, "sample": [0.44994354248046875, -0.28143310546875, -0.97796630859375, 0.77294921875, 0.414276123046875, 0.1448974609375, 0.2357940673828125, 0.4777107238769531, -0.16973876953125, 0.34326171875, 0.1092376708984375, -0.036163330078125, -0.38653564453125, 0.29149627685546875, -0.27010345458984375, 1.674560546875, 0.8386993408203125, 0.0114288330078125, -0.91094970703125, 0.5227813720703125, -0.1309051513671875, -0.16078948974609375, -0.5975379943847656, 0.3770103454589844, 0.1159820556640625, -0.26104736328125, 0.023553848266601562, -0.286041259765625, -1.391510009765625, -0.907623291015625, -0.45849609375, 0.92010498046875, 0.2650146484375, 1.7457275390625, 0.850067138671875, 0.525970458984375, 1.5556640625, 1.03558349609375, 0.2044525146484375, 0.13409805297851562, 0.17950439453125, 0.32708740234375, -0.03891944885253906, -0.628570556640625, 1.864166259765625, -0.14678955078125, 0.2835693359375, -0.34979248046875, 0.6030426025390625, 0.286834716796875, 1.329986572265625, 0.4918212890625, -0.06439971923828125, 0.0968475341796875, 1.4667510986328125, -0.1932373046875, 0.448699951171875, 0.037139892578125, 0.4561595916748047, 1.99114990234375, 2.677703857421875, -0.142364501953125, 0.32916259765625, -0.3652496337890625, -0.32720947265625, -1.041046142578125, -0.24705886840820312, -0.5431728363037109, -1.6484756469726562, -0.13060951232910156, -0.820526123046875, -0.0119781494140625, 0.5172882080078125, 0.4647369384765625, 0.21191024780273438, 0.025726318359375, -0.764495849609375, -0.00946044921875, 0.052730560302734375, 0.10689544677734375, 0.604827880859375, -0.55584716796875, -0.4851531982421875, -0.8517303466796875, -0.40283203125, 0.600982666015625, -0.26177978515625, 0.08966064453125, 0.160919189453125, 1.002197265625, -2.25653076171875, 1.399169921875, -0.81597900390625, -0.41278076171875, 0.9940185546875, 0.159942626953125, 0.2968597412109375, -0.7135009765625, 0.1630401611328125, -0.102325439453125, 0.03076171875, -0.36163330078125, 0.16811752319335938, 0.69610595703125, -0.17962646484375, 0.1188812255859375, 0.546905517578125, -0.5804443359375, 0.15053939819335938, -0.38173675537109375, 0.2054901123046875, 0.11721038818359375, 0.626495361328125, 0.289276123046875, 0.0179595947265625, 0.1400146484375, 0.6538238525390625, -0.0619354248046875, -0.4022064208984375, 0.07744598388671875, 0.391693115234375, -1.75714111328125, 1.155548095703125, 0.418609619140625, -1.0151748657226562, -0.3088226318359375, 0.483428955078125, 0.2512359619140625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000019.npy"}
{"epoch": 0.039790575916230364, "step": 20, "batch_size": 128, "mean": 0.15320217609405518, "std": 0.6666854619979858, "min": -1.90875244140625, "p10": -0.5432460784912109, "median": 0.12400054931640625, "p90": 0.9714874267578124, "max": 1.89459228515625, "pos_frac": 0.5703125, "sample": [0.1243896484375, 0.4029541015625, 0.519012451171875, -0.2940673828125, 0.003406524658203125, -0.158782958984375, 0.20025634765625, -0.874664306640625, 0.243011474609375, 1.52850341796875, 0.06567001342773438, 0.22031021118164062, -0.666717529296875, 0.1236114501953125, 0.553192138671875, 0.2159576416015625, 0.243255615234375, 1.115203857421875, 0.411224365234375, 0.3907012939453125, -0.4458770751953125, 0.17055511474609375, -0.00817108154296875, 0.15210723876953125, -0.915008544921875, 0.7081146240234375, 0.20145034790039062, 0.69921875, 0.6358413696289062, 0.58331298828125, -0.38812255859375, -0.09727096557617188, -1.2245941162109375, 1.149993896484375, 0.2837066650390625, 0.173309326171875, -0.94757080078125, 0.110870361328125, -0.3472900390625, 0.913787841796875, -0.6822052001953125, -1.03753662109375, -0.33221435546875, 0.296630859375, 0.0, -0.246185302734375, 0.811798095703125, 0.2873992919921875, 0.958770751953125, -0.0145263671875, 1.00115966796875, -0.37664794921875, -0.5145263671875, 0.50799560546875, -0.15979766845703125, 0.5415534973144531, -0.03350830078125, -0.418609619140625, 0.6832275390625, -0.41455078125, 1.89459228515625, -0.221771240234375, 0.857177734375, 0.18410873413085938, 1.7066650390625, -0.7842178344726562, -0.792572021484375, -0.3257293701171875, 0.646636962890625, 0.0601806640625, -0.0292816162109375, 0.364990234375, -0.018548965454101562, -0.2671546936035156, 1.31829833984375, -0.459442138671875, 0.038150787353515625, 0.1849365234375, 0.949371337890625, -0.1620025634765625, 0.6055450439453125, -0.2962188720703125, 0.200469970703125, -0.2222900390625, 1.479095458984375, 0.68609619140625, 0.428497314453125, 1.81976318359375, -0.15919113159179688, -1.90875244140625, 0.722076416015625, 1.47412109375, 0.50823974609375, 0.586212158203125, 0.37188720703125, 0.5113677978515625, -0.4007415771484375, 0.219757080078125, -0.73370361328125, -0.1821441650390625, 0.3970489501953125, -0.046142578125, 0.427215576171875, -0.4827880859375, -0.5535774230957031, -0.538818359375, 1.484100341796875, -0.16845703125, -0.14702987670898438, 0.34649658203125, -0.338226318359375, -1.5098876953125, -0.2801532745361328, 0.05242919921875, -0.3826751708984375, 0.25982666015625, -0.04933929443359375, -0.1661376953125, 0.1324462890625, 0.089996337890625, 1.68939208984375, -0.492889404296875, 0.0626373291015625, 1.462310791015625, -0.2598533630371094, 0.90655517578125, 0.295166015625, -0.0672607421875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000020.npy"}
{"epoch": 0.041884816753926704, "step": 21, "batch_size": 128, "mean": 0.009120747447013855, "std": 0.8067171573638916, "min": -2.64617919921875, "p10": -0.8848602294921875, "median": 0.06168365478515625, "p90": 0.8783538818359375, "max": 2.334716796875, "pos_frac": 0.5234375, "sample": [-0.732025146484375, 0.83905029296875, -0.04974365234375, 0.4530792236328125, -0.776947021484375, 1.4653396606445312, -0.14210128784179688, -0.35390472412109375, -0.233154296875, -0.3689422607421875, -0.848876953125, 0.015378952026367188, 0.48636627197265625, 0.1499176025390625, -0.10463714599609375, 0.10455322265625, 0.49853515625, -0.547760009765625, -0.582977294921875, -0.0967864990234375, 0.8156280517578125, 0.0948486328125, 0.158294677734375, 0.4027099609375, -0.018463134765625, -0.3156585693359375, 1.69000244140625, -0.0779266357421875, -0.557342529296875, 1.36651611328125, 0.23482513427734375, -0.3798828125, 0.247894287109375, 0.21551513671875, -1.11871337890625, -0.582489013671875, 1.0153274536132812, -0.505401611328125, -2.64617919921875, 2.334716796875, 1.158172607421875, 0.11032676696777344, 0.65631103515625, -1.655609130859375, 0.0, 0.5896453857421875, 0.489990234375, -0.38332366943359375, -0.4240875244140625, -1.148345947265625, -0.2330322265625, -0.9810333251953125, -0.7978515625, 0.698638916015625, -0.346435546875, 0.1687164306640625, 0.642059326171875, 0.466705322265625, 0.35253143310546875, -0.20326995849609375, 0.656463623046875, -0.513275146484375, 0.877960205078125, 0.4169921875, 0.4225616455078125, 0.2176361083984375, 0.217620849609375, -1.78839111328125, 0.0440216064453125, -0.94866943359375, 1.252227783203125, 0.6982765197753906, 0.0012054443359375, -1.807159423828125, -1.090087890625, -0.436553955078125, -1.41693115234375, -0.365142822265625, -0.046825408935546875, 0.08075714111328125, 1.23248291015625, 1.947265625, -0.3821258544921875, -0.7728271484375, -0.57421875, -1.41143798828125, 0.4798431396484375, -0.8340606689453125, 0.3234100341796875, 0.6460952758789062, 0.262420654296875, -0.2548828125, -2.481689453125, 0.681396484375, 0.37384796142578125, 0.8524169921875, 0.649017333984375, -0.66546630859375, 0.647430419921875, 0.6567306518554688, -0.01142120361328125, 0.794464111328125, -0.03516578674316406, -1.40740966796875, -0.196502685546875, 0.1697235107421875, 0.24603271484375, -0.584014892578125, 0.079345703125, -0.857513427734375, -0.3447761535644531, -0.70745849609375, 0.60552978515625, 0.8792724609375, 1.113861083984375, 0.9771728515625, 1.020416259765625, 0.41717529296875, -0.46478271484375, -0.53692626953125, -0.27349853515625, 0.2659034729003906, 0.5511474609375, 0.7307052612304688, 0.7718048095703125, -0.2784423828125, 0.131134033203125, -0.44535064697265625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000021.npy"}
{"epoch": 0.04397905759162304, "step": 22, "batch_size": 128, "mean": 0.04826001822948456, "std": 0.7436685562133789, "min": -2.411376953125, "p10": -0.80281982421875, "median": 0.047611236572265625, "p90": 0.9694046020507809, "max": 1.92340087890625, "pos_frac": 0.53125, "sample": [0.23724365234375, -0.044696807861328125, 0.1196746826171875, -0.0482177734375, 0.0, 0.93450927734375, 0.03189849853515625, -0.78656005859375, -1.105377197265625, -0.574249267578125, 0.4469146728515625, -0.3056793212890625, -2.26611328125, -0.4600067138671875, 0.3501625061035156, 0.3880462646484375, -0.04511833190917969, 0.2794761657714844, 1.212646484375, 0.379791259765625, -2.411376953125, -1.594573974609375, -0.234619140625, -0.9539794921875, 0.165618896484375, 0.28794097900390625, -0.35467529296875, 0.1331634521484375, -0.08803558349609375, -0.04907989501953125, 0.06786155700683594, -0.6497802734375, 1.92340087890625, 0.029937744140625, 0.08172607421875, -0.2729339599609375, 0.068023681640625, 1.0508270263671875, 0.5325775146484375, 0.5569610595703125, 0.4439544677734375, -0.38083648681640625, 0.279327392578125, 0.70111083984375, 0.30865478515625, 0.8113479614257812, -0.3019561767578125, 0.7463531494140625, 0.6265640258789062, -0.183441162109375, -0.32051658630371094, -0.108551025390625, -0.93609619140625, -0.3183860778808594, 0.529541015625, 0.381744384765625, 0.8243560791015625, 1.28729248046875, -0.5587615966796875, -0.454071044921875, -0.1262969970703125, -1.3551025390625, -0.378204345703125, 1.213836669921875, -0.789947509765625, 0.53839111328125, 1.0714111328125, 0.03319549560546875, 1.23394775390625, -0.192779541015625, 0.0620269775390625, 1.7950439453125, 0.02197265625, -0.2835845947265625, -0.640380859375, 1.355224609375, 0.270355224609375, 1.232086181640625, 1.394256591796875, 0.40167236328125, 0.385406494140625, 0.294891357421875, -0.832855224609375, 0.5214385986328125, -0.46550750732421875, -0.16082763671875, -0.4670562744140625, 0.7632598876953125, 0.276611328125, 0.117584228515625, 0.7196807861328125, 0.13848114013671875, -0.163726806640625, -0.632781982421875, -0.468902587890625, -0.8464508056640625, -0.651458740234375, 0.1853923797607422, -0.338531494140625, 0.715087890625, 0.15256500244140625, -0.1772003173828125, 1.094970703125, 0.5362396240234375, 0.7432632446289062, 0.7177276611328125, -0.44671630859375, -0.349609375, 0.895355224609375, -0.111907958984375, 1.52215576171875, -0.0150299072265625, 0.34149169921875, 0.36749267578125, -0.2376708984375, 0.55487060546875, -1.59136962890625, -1.4346771240234375, -0.2742958068847656, 0.445220947265625, -0.49582672119140625, -0.28924560546875, 0.553558349609375, -1.219970703125, 0.21002197265625, -1.537139892578125, -0.11860466003417969, -0.016204833984375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000022.npy"}
{"epoch": 0.04607329842931937, "step": 23, "batch_size": 128, "mean": 0.15114274621009827, "std": 0.848679780960083, "min": -1.9569091796875, "p10": -0.7756057739257812, "median": 0.14914894104003906, "p90": 0.9502532958984373, "max": 3.3284912109375, "pos_frac": 0.578125, "sample": [-0.004062652587890625, 0.8525848388671875, 0.7453155517578125, 0.9144287109375, 0.34851837158203125, -0.3417930603027344, -0.0179443359375, 0.991119384765625, 0.3394775390625, -1.51873779296875, -0.5503177642822266, 0.10129547119140625, 0.77288818359375, 0.42670440673828125, -0.2957611083984375, 0.11794471740722656, -1.23779296875, -0.1740264892578125, -0.06402587890625, -1.42291259765625, -0.71551513671875, -0.174774169921875, 0.1965179443359375, 3.0235595703125, -0.1115875244140625, 0.0186004638671875, -0.2508087158203125, 0.43138885498046875, 0.5472259521484375, 0.46044921875, 0.15042495727539062, 0.873779296875, 0.613555908203125, -1.4033203125, -0.529327392578125, 1.69036865234375, -0.5910606384277344, 0.25616455078125, 0.454803466796875, 0.9327392578125, 0.15428543090820312, -1.180908203125, 0.19140625, 0.393768310546875, -0.341949462890625, -0.2676849365234375, 0.79119873046875, -1.0028076171875, 0.33380126953125, -1.744171142578125, 0.5814971923828125, 0.3409614562988281, -0.701324462890625, 0.658416748046875, 0.6003494262695312, -0.7978973388671875, 0.163421630859375, 0.74395751953125, 0.1369171142578125, -0.080902099609375, 2.216949462890625, 0.2003631591796875, -0.256500244140625, -1.9569091796875, 0.9227294921875, -1.3101806640625, 0.20794677734375, -0.6925048828125, 0.7167205810546875, 1.483734130859375, -1.33660888671875, -0.1598968505859375, 1.0414276123046875, 0.05988311767578125, -0.421112060546875, 0.58258056640625, 0.2235565185546875, -0.2316436767578125, 0.26319122314453125, 0.309906005859375, 0.498687744140625, 1.29779052734375, 1.073272705078125, 0.4756317138671875, -0.126434326171875, 0.57427978515625, -0.76605224609375, 0.7288970947265625, 0.5160369873046875, 0.905487060546875, -0.18048858642578125, -0.28166961669921875, -0.4573974609375, -0.3650665283203125, -0.032196044921875, 0.649078369140625, 0.4175567626953125, 1.44512939453125, 2.340362548828125, 0.052799224853515625, 0.228851318359375, 1.5050048828125, 0.361328125, -0.4760093688964844, -0.6315383911132812, -0.29570770263671875, 0.0, -0.29982757568359375, -0.29229736328125, -0.4528350830078125, 0.9119873046875, -0.27094268798828125, 3.3284912109375, 0.10164260864257812, 0.1478729248046875, -1.2050018310546875, 0.4647674560546875, -0.367462158203125, 0.19731903076171875, 1.848663330078125, 0.875762939453125, 0.11727142333984375, 0.0121002197265625, -0.12890625, -1.462677001953125, 0.1800079345703125, -0.317718505859375, -0.1916351318359375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000023.npy"}
{"epoch": 0.048167539267015703, "step": 24, "batch_size": 128, "mean": 0.3028638958930969, "std": 0.8403819799423218, "min": -2.3878173828125, "p10": -0.6899566650390625, "median": 0.203887939453125, "p90": 1.2845077514648435, "max": 2.6141204833984375, "pos_frac": 0.6328125, "sample": [0.6066665649414062, 1.6378021240234375, 0.4818229675292969, -0.8512115478515625, 1.64532470703125, -0.5152587890625, 0.31256103515625, 0.919158935546875, -0.8876953125, 2.6141204833984375, -0.11468505859375, -0.0022716522216796875, 0.274017333984375, 0.8108978271484375, 0.788665771484375, 0.6587066650390625, 0.727294921875, 0.6487274169921875, 0.08889007568359375, 0.0090484619140625, -1.45050048828125, -0.4468536376953125, -0.25408935546875, 2.083038330078125, -0.2715911865234375, -0.90069580078125, -0.495819091796875, 0.783660888671875, 1.2686004638671875, 0.9945068359375, 0.186431884765625, -0.6622314453125, 1.184051513671875, 1.22760009765625, -0.53106689453125, 0.1185302734375, 1.5396728515625, 2.476776123046875, 0.00736236572265625, 1.60321044921875, 0.78216552734375, 0.0584716796875, 0.110198974609375, 0.0, 0.6612472534179688, -0.38709259033203125, 2.22943115234375, 0.0413055419921875, 0.012176513671875, -0.386199951171875, -0.1270751953125, -0.209259033203125, -0.7225341796875, 0.49102783203125, -0.341217041015625, 0.221343994140625, 0.614349365234375, -0.88421630859375, 1.04144287109375, -2.3878173828125, -1.3773193359375, 0.1008453369140625, 1.390655517578125, 0.3425750732421875, 0.2627372741699219, 2.01568603515625, -0.0653533935546875, 0.10282135009765625, -0.497222900390625, 0.23371124267578125, -0.3674201965332031, -1.30828857421875, -0.08734130859375, 0.13502883911132812, 0.676116943359375, -0.02069091796875, 0.95428466796875, -0.15433883666992188, 1.218994140625, -0.675994873046875, -0.6455612182617188, 0.616851806640625, 1.321624755859375, 0.3636474609375, -0.3112030029296875, 0.7357177734375, 1.1058349609375, 1.15185546875, 1.5998382568359375, 0.28379058837890625, 0.9727783203125, 0.49365234375, 1.1673583984375, 1.17218017578125, 0.163665771484375, 1.02984619140625, -0.758544921875, 0.0263824462890625, 0.306243896484375, 0.183349609375, 0.541778564453125, 0.82989501953125, 0.0, 1.40625, -0.14046478271484375, 1.17919921875, -0.7901611328125, 0.0011138916015625, -0.848419189453125, -0.108642578125, 1.0457763671875, 0.6874008178710938, -0.3038330078125, 0.508087158203125, 0.332305908203125, 0.7749786376953125, 0.9010162353515625, 0.9468994140625, 1.1922607421875, 0.2462310791015625, -0.221710205078125, -0.05548095703125, -0.10626220703125, 0.056415557861328125, -1.7835693359375, -0.1371307373046875, -0.039764404296875, -0.337310791015625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000024.npy"}
{"epoch": 0.050261780104712044, "step": 25, "batch_size": 128, "mean": 0.14042150974273682, "std": 0.6720383167266846, "min": -1.89556884765625, "p10": -0.6580810546875, "median": 0.1896686553955078, "p90": 0.8322174072265625, "max": 2.146728515625, "pos_frac": 0.6171875, "sample": [-0.166168212890625, 0.1843414306640625, 0.561004638671875, -0.06756591796875, 0.0, 0.23352432250976562, -0.233184814453125, 0.2245197296142578, -0.7061614990234375, -0.2538909912109375, 1.65966796875, -0.4432830810546875, 0.28094482421875, 1.3871002197265625, 0.021526336669921875, -0.477630615234375, -0.09643936157226562, -0.2292327880859375, 1.00946044921875, 0.2275390625, 0.7365875244140625, 0.7745361328125, -0.2281036376953125, 0.3190765380859375, -0.782745361328125, 0.731109619140625, -0.3621826171875, -0.754974365234375, -0.1263427734375, 0.0425262451171875, 0.1343994140625, -0.1869964599609375, -0.4754638671875, 0.7060546875, 0.210723876953125, 0.694091796875, -0.419097900390625, -0.017608642578125, -0.650390625, 0.271881103515625, 0.1085205078125, 0.029876708984375, 0.15990447998046875, 0.456939697265625, 0.4389190673828125, -0.12564468383789062, -0.264007568359375, 0.6212921142578125, -1.677947998046875, 0.8330078125, 1.1109466552734375, 1.0485076904296875, 0.0543365478515625, -0.13897705078125, -0.0433807373046875, -0.18332672119140625, -0.2712860107421875, 0.4473876953125, 0.19499588012695312, 0.25863075256347656, -0.181793212890625, 0.6560134887695312, -0.2405853271484375, 0.092559814453125, 0.5047607421875, 0.476898193359375, 0.476043701171875, -0.604949951171875, -0.85345458984375, 0.0, 0.51702880859375, 1.0711669921875, 0.274932861328125, -0.57537841796875, -1.090545654296875, 0.01226806640625, -0.34059906005859375, 0.6878662109375, -0.186737060546875, -1.7926788330078125, 0.8222808837890625, -1.311676025390625, 0.3897705078125, 0.052215576171875, 0.60595703125, 1.619293212890625, 0.28594970703125, 0.3094940185546875, -0.16511917114257812, 1.54168701171875, 0.115386962890625, 0.8157196044921875, 0.0079803466796875, 0.30069541931152344, -1.89556884765625, 0.1998748779296875, 0.6683197021484375, -1.305389404296875, -0.1735382080078125, 0.47052001953125, -0.56207275390625, 0.971435546875, -1.0031585693359375, 0.73968505859375, 0.24639892578125, 0.5753173828125, 0.314239501953125, 0.7893905639648438, 0.6417465209960938, -0.20558929443359375, 0.831878662109375, 2.146728515625, 0.230194091796875, 0.13253402709960938, -0.901397705078125, -0.676025390625, 0.6830902099609375, -0.400634765625, 0.2437591552734375, 1.026031494140625, 0.58843994140625, -0.6248779296875, 1.482452392578125, 0.39593505859375, 0.28619384765625, 0.0708160400390625, 0.70684814453125, 0.1960773468017578], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000025.npy"}
{"epoch": 0.05235602094240838, "step": 26, "batch_size": 128, "mean": 0.11090953648090363, "std": 0.9344504475593567, "min": -2.6765899658203125, "p10": -0.8343460083007812, "median": 0.008434295654296875, "p90": 1.1225738525390625, "max": 4.11602783203125, "pos_frac": 0.5078125, "sample": [4.11602783203125, -0.3455047607421875, 1.13037109375, 0.0779266357421875, 0.20644378662109375, 0.08587646484375, -0.1148681640625, 3.01397705078125, 0.5496826171875, -0.2799644470214844, -0.97900390625, 0.00177001953125, 0.0710296630859375, 0.01509857177734375, 1.8521728515625, 0.29913330078125, -0.0813140869140625, 0.3679962158203125, -1.0765380859375, -0.7996826171875, 0.0928802490234375, -0.13613510131835938, 0.37917518615722656, 0.2710113525390625, 0.961700439453125, 0.478851318359375, 0.28558349609375, -0.116180419921875, -1.2466278076171875, 0.1511688232421875, 0.867279052734375, -0.496124267578125, 2.22662353515625, 0.13733673095703125, -0.11708641052246094, -0.6955108642578125, -0.76873779296875, -2.6765899658203125, -0.570526123046875, -0.296539306640625, 0.828704833984375, -0.8061065673828125, -0.15708541870117188, 1.6011962890625, -0.0672760009765625, 0.7271728515625, 0.605133056640625, -0.020053863525390625, 2.39398193359375, 0.30535125732421875, -0.07535552978515625, -0.4516639709472656, -0.4197998046875, 0.17812347412109375, 0.042388916015625, 0.5595245361328125, 0.739288330078125, -0.1546630859375, 0.2856292724609375, 0.947784423828125, -1.164581298828125, 1.058502197265625, -0.51800537109375, -0.694976806640625, -0.345947265625, 0.0654144287109375, 0.08305740356445312, 1.10565185546875, 0.920684814453125, 0.20695114135742188, -0.24236297607421875, -1.1163330078125, -0.15185546875, 0.2288818359375, 0.30218505859375, -0.0532073974609375, -0.103057861328125, -0.7942352294921875, 0.220703125, -0.3455810546875, 1.7086181640625, 1.119232177734375, 0.11248779296875, 0.4578285217285156, 0.611419677734375, -1.577178955078125, 0.3902435302734375, 0.0, 0.9473495483398438, -0.6283187866210938, 1.52740478515625, -0.15654754638671875, -0.1632080078125, -0.900238037109375, -0.453125, 0.294189453125, 0.0, -0.5846786499023438, -0.156097412109375, -0.6755905151367188, -1.09649658203125, -1.315521240234375, 0.5447540283203125, -0.1998138427734375, 0.37506103515625, -0.26245880126953125, -0.0483856201171875, -0.549835205078125, -0.2532768249511719, 0.839385986328125, 1.29278564453125, 0.7566986083984375, -0.5984039306640625, -0.2238941192626953, 0.17315673828125, -0.6426048278808594, -1.47784423828125, -0.5023155212402344, 1.1729583740234375, -0.0252838134765625, 0.3207206726074219, 1.05218505859375, 2.583282470703125, 1.5162811279296875, -0.6916656494140625, -1.128936767578125, -2.05035400390625, 0.19610595703125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000026.npy"}
{"epoch": 0.05445026178010471, "step": 27, "batch_size": 128, "mean": 0.20181477069854736, "std": 0.8579765558242798, "min": -2.4207763671875, "p10": -0.74169921875, "median": 0.18629074096679688, "p90": 1.3170104980468746, "max": 3.484619140625, "pos_frac": 0.6328125, "sample": [0.447967529296875, -0.41583251953125, -0.225128173828125, -0.07563400268554688, 0.6166839599609375, 0.28143310546875, 0.9336090087890625, 0.654510498046875, -1.101806640625, 0.17127227783203125, 0.03521728515625, 1.083648681640625, 0.705291748046875, 0.5891876220703125, 0.3309326171875, 0.247711181640625, 0.30185699462890625, 0.5192794799804688, 1.70404052734375, 0.21712112426757812, -0.765625, 0.08715057373046875, 0.2705535888671875, -0.113555908203125, 0.2472991943359375, 1.39361572265625, 0.104400634765625, -0.3759765625, -0.05263710021972656, 0.59136962890625, 0.10681915283203125, 0.2196807861328125, -0.7314453125, -0.65228271484375, -0.1683807373046875, 0.775146484375, -0.02307891845703125, 0.4586181640625, 0.665496826171875, 0.72796630859375, -0.7040557861328125, 0.5653266906738281, 1.2841796875, -0.381744384765625, 0.30284881591796875, 0.40656280517578125, 1.467071533203125, 2.102325439453125, -0.3807373046875, -0.12896728515625, -2.4207763671875, -0.81353759765625, 0.407196044921875, -0.6795444488525391, -0.03105926513671875, 2.58758544921875, -0.660614013671875, 1.21099853515625, 0.136016845703125, 0.09972190856933594, 0.777252197265625, 0.7215385437011719, -0.6580963134765625, 0.32598876953125, 0.0105438232421875, 0.0370635986328125, -0.586578369140625, -0.804351806640625, 0.6231689453125, 0.28118896484375, 0.841064453125, 0.7270050048828125, 0.079132080078125, 0.284637451171875, -0.4715461730957031, 0.1070556640625, 0.4342041015625, 1.94256591796875, 0.001068115234375, 3.484619140625, -1.563690185546875, -1.35089111328125, 0.18820953369140625, -0.779205322265625, -0.4341144561767578, 1.44622802734375, -0.3133544921875, 0.2557411193847656, -0.16652297973632812, 0.248565673828125, 1.4457550048828125, -0.2471923828125, 1.626495361328125, 0.247833251953125, 1.123809814453125, -0.916107177734375, 0.3603515625, -0.2884063720703125, -1.8289794921875, 0.71435546875, -0.3834075927734375, 2.439208984375, 0.562469482421875, -0.165924072265625, -0.07904815673828125, -0.1210479736328125, 0.13525390625, 0.062713623046875, -1.53875732421875, -0.924407958984375, -0.240081787109375, 0.244384765625, 1.741790771484375, 1.828643798828125, -0.69732666015625, 0.49468994140625, -0.2170867919921875, 0.2650127410888672, 0.369354248046875, 0.11693382263183594, -0.63970947265625, -0.4743499755859375, 0.0312042236328125, -0.8980712890625, 0.943267822265625, 0.1843719482421875, 0.46575927734375, 0.2447509765625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000027.npy"}
{"epoch": 0.05654450261780105, "step": 28, "batch_size": 128, "mean": 0.1473444700241089, "std": 0.8910412788391113, "min": -2.7093505859375, "p10": -0.9461303710937499, "median": 0.15178680419921875, "p90": 0.9210212707519528, "max": 4.3232421875, "pos_frac": 0.59375, "sample": [0.34521484375, -2.7093505859375, 0.548370361328125, 1.894378662109375, -0.84906005859375, -0.3911590576171875, 0.03179931640625, 0.0326995849609375, 0.4355926513671875, 0.36286163330078125, 1.240386962890625, 0.089874267578125, 0.50152587890625, 0.8049545288085938, -1.51220703125, 0.66314697265625, -1.381744384765625, 0.3291015625, -0.5647430419921875, -1.321807861328125, 3.426788330078125, -0.7232131958007812, 1.81585693359375, 0.7637481689453125, -0.1373291015625, 0.1430511474609375, -0.329742431640625, -0.277862548828125, 0.5260009765625, -0.3290901184082031, 0.584442138671875, 0.37255859375, 0.29796600341796875, 0.4000282287597656, 0.03189849853515625, -0.346954345703125, 0.73828125, 0.65008544921875, -0.097259521484375, -1.1922607421875, -0.16021347045898438, -0.36907958984375, 0.9971084594726562, 1.23419189453125, 0.46478271484375, 1.93939208984375, 0.701934814453125, 0.22467041015625, 0.7584381103515625, 0.68585205078125, 0.3336181640625, 0.2315673828125, -0.41864013671875, 0.8884124755859375, -1.090057373046875, 0.50042724609375, 0.1059417724609375, -0.6476211547851562, 0.2386932373046875, -0.040496826171875, 0.223846435546875, 0.26653289794921875, 0.85113525390625, -0.26126861572265625, 0.6203517913818359, 0.640869140625, 0.058704376220703125, 0.6897125244140625, -0.4322357177734375, -0.4296875, -0.2159423828125, -0.46820068359375, 0.67132568359375, -0.5781784057617188, -0.27313232421875, 0.2905101776123047, -0.45703125, -0.14605712890625, -0.63653564453125, 0.3397216796875, 0.38047027587890625, -0.01287078857421875, -0.071929931640625, -0.44683837890625, 0.855255126953125, -1.39825439453125, -0.46441650390625, -0.3722381591796875, -1.1780853271484375, 0.2949981689453125, 0.814971923828125, 1.489044189453125, -1.0459747314453125, 1.3568115234375, -0.0806884765625, 0.691741943359375, -0.44415283203125, -1.0281829833984375, -0.67718505859375, 1.5901031494140625, -0.5037384033203125, 0.0731201171875, -1.281402587890625, -0.225555419921875, 0.2154541015625, 0.3680419921875, 1.075347900390625, 0.64129638671875, 0.6110687255859375, -0.6392898559570312, 0.6964569091796875, 0.30319976806640625, -1.149658203125, 0.04976654052734375, 0.1041412353515625, 4.3232421875, 0.48577880859375, 0.10433197021484375, 0.40057373046875, -0.9109649658203125, 1.7550506591796875, -0.219268798828125, 0.1605224609375, 0.5094985961914062, -1.16436767578125, 0.5083580017089844, 0.0, 0.1163177490234375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000028.npy"}
{"epoch": 0.05863874345549738, "step": 29, "batch_size": 128, "mean": 0.19124989211559296, "std": 0.8794799447059631, "min": -2.01739501953125, "p10": -0.8680992126464843, "median": 0.1277618408203125, "p90": 1.3839691162109373, "max": 2.45819091796875, "pos_frac": 0.578125, "sample": [-0.1260547637939453, -0.729461669921875, -0.15576171875, 0.592498779296875, 1.478668212890625, -0.172454833984375, -0.5344772338867188, -2.01739501953125, 0.35107421875, 0.5635566711425781, 0.17071533203125, 0.019741058349609375, -0.0968780517578125, 0.842010498046875, -1.557342529296875, 0.482177734375, 0.26395416259765625, 0.757049560546875, 0.1380615234375, -0.083984375, -0.9194488525390625, 0.2605743408203125, -0.8460922241210938, 1.735137939453125, 0.506683349609375, -1.3751754760742188, 0.87823486328125, 0.34139251708984375, -1.07806396484375, -0.1356353759765625, 0.035797119140625, 1.8983154296875, 1.152435302734375, 0.6185302734375, -1.052825927734375, 1.45025634765625, 1.2159423828125, 0.117462158203125, 1.6376800537109375, -0.6035919189453125, -0.400543212890625, -0.51904296875, 0.05230712890625, 1.76031494140625, 0.312103271484375, 0.245574951171875, 0.6505889892578125, -0.3931884765625, 0.0, 0.32997894287109375, 0.2947998046875, -0.818756103515625, 1.0363006591796875, -0.5403938293457031, 0.9432220458984375, 0.25360107421875, -0.2624664306640625, 0.106109619140625, -0.031646728515625, 1.355560302734375, -0.124298095703125, 1.763885498046875, 2.45819091796875, 0.83172607421875, -1.42242431640625, 0.80010986328125, 2.055694580078125, 0.143280029296875, -2.00390625, -0.054515838623046875, 1.22894287109375, -0.1295928955078125, 0.4901885986328125, 0.4185791015625, -1.135223388671875, -0.3492431640625, -0.1429443359375, 1.1625823974609375, 0.972320556640625, -0.5747909545898438, 0.007720947265625, -1.0286865234375, 0.4486961364746094, 0.5865936279296875, -0.5198822021484375, -0.22126007080078125, 0.056304931640625, -0.232421875, 0.5481414794921875, -0.964111328125, 0.14115142822265625, 0.469482421875, 0.04286956787109375, 0.28008270263671875, 2.23101806640625, 0.6572799682617188, 0.864990234375, 1.1464691162109375, -0.4428882598876953, 0.0, -1.1397705078125, -1.9300193786621094, 0.0774993896484375, 1.103485107421875, -0.45154571533203125, -0.618408203125, 0.85687255859375, -0.339630126953125, -0.492919921875, -0.56976318359375, 1.86700439453125, 0.02197265625, 0.378021240234375, 0.18268775939941406, 1.109375, 0.51824951171875, -0.5701904296875, 1.326416015625, 0.40625, -0.1406402587890625, -0.41632843017578125, 0.84222412109375, 1.659454345703125, -0.0962982177734375, -0.76641845703125, -0.309814453125, 1.495697021484375, -0.381317138671875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000029.npy"}
{"epoch": 0.060732984293193716, "step": 30, "batch_size": 128, "mean": 0.306654155254364, "std": 1.021482229232788, "min": -3.038665771484375, "p10": -0.641950225830078, "median": 0.1968250274658203, "p90": 1.42127685546875, "max": 4.400726318359375, "pos_frac": 0.625, "sample": [0.215057373046875, 0.5683441162109375, 1.5814208984375, 1.220428466796875, -0.34124755859375, -1.3046875, -0.4169044494628906, 1.0918121337890625, -0.417266845703125, 0.25872802734375, 0.11232757568359375, 0.0263214111328125, -0.07305145263671875, -1.183013916015625, 0.08585357666015625, -0.3062591552734375, -0.477386474609375, 0.08847808837890625, 0.811920166015625, 0.08831787109375, 0.37506103515625, 0.09489059448242188, 0.84136962890625, 1.44219970703125, -0.5896530151367188, 2.687744140625, 0.9515380859375, -0.285400390625, 1.419921875, 0.9462890625, 0.14856529235839844, -0.3958740234375, 0.09885787963867188, 2.12091064453125, 0.1022491455078125, -0.3124542236328125, -1.120697021484375, -0.5152664184570312, -0.170135498046875, 1.33526611328125, 0.9727783203125, 0.4541015625, 0.1694793701171875, -0.44242095947265625, 1.674407958984375, 0.5503997802734375, 3.337615966796875, 0.3131217956542969, 0.6150741577148438, 0.7607421875, -0.026670455932617188, 1.63140869140625, 1.13134765625, 0.2831268310546875, 0.6306610107421875, 0.7728424072265625, 0.2168140411376953, 0.570404052734375, 4.400726318359375, -0.2725372314453125, 0.4360504150390625, 0.5348052978515625, -0.27811241149902344, 0.025146484375, 0.714202880859375, 0.243194580078125, -0.2276153564453125, -0.439971923828125, 0.595672607421875, -0.8323593139648438, 0.3363037109375, 1.2796630859375, -0.01531982421875, -0.475738525390625, 1.79901123046875, 0.18456649780273438, -1.967437744140625, -0.11135482788085938, -0.58831787109375, 1.14306640625, 0.384246826171875, -0.15237045288085938, -0.76397705078125, 3.4564208984375, 1.4244384765625, 0.20908355712890625, -0.336151123046875, 1.0390625, -0.0867462158203125, 0.2695159912109375, -0.16355133056640625, -0.16058349609375, -1.058013916015625, -0.252655029296875, -0.37786865234375, -1.94097900390625, 2.122802734375, -0.859405517578125, 1.0442276000976562, -0.9827728271484375, 0.6861572265625, -3.038665771484375, -0.15355300903320312, 0.888275146484375, 0.7182579040527344, 0.01324462890625, 0.3023834228515625, -0.10052108764648438, 0.70794677734375, 0.626190185546875, 0.0258026123046875, -0.510772705078125, -0.5847549438476562, 0.787750244140625, 0.4841461181640625, 0.1658782958984375, 0.3646087646484375, -1.5137939453125, 1.150604248046875, -0.3772735595703125, 0.6044158935546875, 1.0780029296875, -1.044281005859375, 2.88677978515625, 0.07640266418457031, 0.6552200317382812, -0.255401611328125, 0.8905029296875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000030.npy"}
{"epoch": 0.06282722513089005, "step": 31, "batch_size": 128, "mean": 0.19775360822677612, "std": 0.8427091240882874, "min": -1.863922119140625, "p10": -0.7890838623046875, "median": 0.13528728485107422, "p90": 1.10284423828125, "max": 2.75909423828125, "pos_frac": 0.5859375, "sample": [0.4963226318359375, -0.139923095703125, 0.96649169921875, -0.326812744140625, 0.6072044372558594, -0.2568511962890625, 0.41375732421875, -0.6556396484375, 0.5821037292480469, 0.2656135559082031, 1.0846328735351562, 0.751373291015625, 0.318695068359375, 0.0792999267578125, -1.858306884765625, -1.2637176513671875, 0.2974700927734375, 1.354034423828125, 1.7930755615234375, -1.848846435546875, -1.45489501953125, -0.808502197265625, 1.2340850830078125, 0.0, 0.3308296203613281, 0.33249664306640625, 1.799468994140625, -0.29271507263183594, -0.17626190185546875, -1.863922119140625, 0.03955078125, 0.131591796875, -0.28549957275390625, 1.0907745361328125, -0.905120849609375, -0.05340576171875, -0.2404327392578125, 0.4283599853515625, -0.353424072265625, 0.0, -0.31134033203125, -1.474334716796875, 0.2848968505859375, -0.16815185546875, 0.9777374267578125, 0.57476806640625, 0.054706573486328125, 0.10137176513671875, 1.07025146484375, 0.9164333343505859, 0.7075653076171875, 1.722198486328125, 0.2127685546875, 2.75909423828125, -0.6770477294921875, 0.1407318115234375, -0.4298095703125, 0.3408050537109375, -0.080657958984375, 0.47174072265625, 0.00275421142578125, 0.606842041015625, 0.92181396484375, 1.41668701171875, 0.724517822265625, 0.75384521484375, 1.015869140625, -0.2558441162109375, 0.0059814453125, 0.713226318359375, 0.0, -1.049407958984375, 0.25319671630859375, -0.46484375, -0.256011962890625, 0.4179840087890625, 1.8648681640625, 0.199432373046875, 0.880584716796875, 0.907012939453125, -0.38217926025390625, 0.7069473266601562, 0.581451416015625, 1.0562667846679688, -0.16388702392578125, -0.78076171875, -0.9138641357421875, -0.39693260192871094, 1.2433319091796875, 1.12890625, -0.3012847900390625, 0.33956336975097656, -0.463104248046875, -0.43756103515625, 0.13071250915527344, -0.4541015625, 0.733184814453125, -0.4356689453125, 0.03472900390625, 0.767822265625, 0.08809661865234375, 0.0563201904296875, 0.7235107421875, 0.2188568115234375, -0.8144073486328125, 0.5467529296875, 0.3573417663574219, -0.1255035400390625, 0.0, 0.325927734375, 1.007965087890625, -0.619659423828125, -0.3480987548828125, -0.16662979125976562, 2.5803375244140625, -1.53082275390625, 1.197540283203125, -0.2197418212890625, 1.0916748046875, 0.13898277282714844, 2.487579345703125, -0.214691162109375, -0.403045654296875, -0.0512542724609375, -0.6485862731933594, 0.601715087890625, -1.3100433349609375, 0.8835830688476562], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000031.npy"}
{"epoch": 0.06492146596858639, "step": 32, "batch_size": 128, "mean": 0.36048924922943115, "std": 0.9930692315101624, "min": -2.5118408203125, "p10": -0.7264228820800781, "median": 0.34806060791015625, "p90": 1.5402465820312499, "max": 3.273193359375, "pos_frac": 0.640625, "sample": [0.7330322265625, 0.0029468536376953125, -0.8145751953125, 0.525482177734375, -0.1340484619140625, -2.387847900390625, 0.65557861328125, 0.85015869140625, 2.26934814453125, 1.05816650390625, 0.828887939453125, 0.581085205078125, -0.3674774169921875, -0.5033111572265625, 1.12591552734375, 1.07440185546875, 0.0860443115234375, -0.813201904296875, -0.5786590576171875, -0.54974365234375, 0.402008056640625, 0.3582763671875, 1.193450927734375, -1.1469268798828125, 0.327301025390625, 1.521331787109375, -0.18059158325195312, 0.1284198760986328, 0.117431640625, 1.24420166015625, 1.225921630859375, 1.66082763671875, -0.5241851806640625, -0.678558349609375, -0.26448822021484375, -0.1942901611328125, -0.552764892578125, 1.2398681640625, 1.432342529296875, 0.305419921875, -0.39791107177734375, 1.1494140625, -0.6051177978515625, 0.1348114013671875, 0.18707275390625, 0.9058837890625, 0.60498046875, -0.732635498046875, 0.1859130859375, -0.0843505859375, 0.681060791015625, 0.793853759765625, -0.481109619140625, 3.273193359375, 2.524078369140625, 0.5725765228271484, -1.3154296875, 1.48809814453125, 0.611663818359375, 1.362579345703125, 0.80859375, 1.18359375, 1.584381103515625, -0.622222900390625, -0.021026611328125, -0.047454833984375, -0.7151947021484375, 0.3045654296875, 0.4665069580078125, 0.003082275390625, -1.1737060546875, -0.3995208740234375, 1.01861572265625, 1.94671630859375, 0.5862808227539062, -0.015594482421875, 0.3946533203125, 2.88409423828125, 1.9278564453125, 0.111328125, 0.40160369873046875, 0.405548095703125, 1.002471923828125, 0.395721435546875, -2.5118408203125, -0.1648101806640625, -0.3372993469238281, -0.7288742065429688, -0.3100128173828125, -1.28729248046875, -0.4691314697265625, 0.025997161865234375, 1.137542724609375, 0.24016571044921875, -0.9130859375, 1.650177001953125, -0.594207763671875, 1.0792617797851562, 1.512298583984375, -0.09438323974609375, 0.5186309814453125, 1.31292724609375, 0.8198699951171875, 0.03459930419921875, 0.0350341796875, 1.3377914428710938, -0.1143646240234375, 0.14521026611328125, 0.71270751953125, 0.3958892822265625, 0.533050537109375, 2.184417724609375, 0.3378448486328125, -1.5582275390625, 0.48569488525390625, 1.72540283203125, -0.0519866943359375, 0.8668594360351562, 0.495635986328125, 0.9362869262695312, 2.0360870361328125, -0.21356201171875, -1.714752197265625, -0.327056884765625, -0.666168212890625, -0.725372314453125, 1.142333984375, 2.6766357421875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000032.npy"}
{"epoch": 0.06701570680628273, "step": 33, "batch_size": 128, "mean": 0.3831309676170349, "std": 1.0761810541152954, "min": -2.4735107421875, "p10": -0.9625350952148437, "median": 0.42116546630859375, "p90": 1.6087081909179688, "max": 3.82501220703125, "pos_frac": 0.625, "sample": [0.5284614562988281, 1.6322021484375, 0.58251953125, -1.029266357421875, 0.739837646484375, -2.2298583984375, 0.8052444458007812, 1.612457275390625, 0.3093719482421875, 2.07177734375, 0.3148040771484375, -1.68511962890625, -0.1873626708984375, -0.5461273193359375, 0.782257080078125, -0.6926727294921875, 0.05754280090332031, -1.231964111328125, -0.1757221221923828, -0.34529876708984375, 0.818817138671875, 0.883453369140625, -0.850341796875, 1.6771240234375, 1.261383056640625, -1.066986083984375, 2.3790283203125, 0.0156402587890625, 2.29376220703125, 1.049713134765625, 0.447479248046875, 0.68316650390625, -0.07823944091796875, 0.5645904541015625, -0.7102127075195312, 1.50048828125, 1.399871826171875, 1.5106964111328125, -1.870819091796875, 1.132080078125, -0.41351318359375, 0.89288330078125, 0.2361431121826172, 0.05548095703125, -0.060302734375, 0.0, -0.044647216796875, -0.06232452392578125, -1.520751953125, 0.9677581787109375, 1.5515899658203125, -0.21086883544921875, -0.32904052734375, 0.16497421264648438, 1.319854736328125, 0.657196044921875, 0.904876708984375, 0.5339202880859375, 1.42840576171875, -0.428619384765625, -0.3014984130859375, 1.3684158325195312, -0.0108642578125, -0.28107452392578125, 0.864288330078125, 0.6895828247070312, 1.52862548828125, 0.19153594970703125, 0.7111358642578125, 0.6348152160644531, 0.03279876708984375, 0.9598388671875, -0.1431884765625, -1.05938720703125, -0.062530517578125, 0.6006317138671875, -0.04169654846191406, -0.683349609375, 1.1147003173828125, -0.36798095703125, -1.7052001953125, 0.99517822265625, -0.308868408203125, -0.1617431640625, -0.566314697265625, 0.6339263916015625, 0.3948516845703125, 2.130218505859375, 1.934967041015625, 0.508392333984375, 0.936279296875, 0.5513153076171875, 0.977630615234375, 0.7656173706054688, 1.701416015625, -0.3837127685546875, 3.0711669921875, 0.23968505859375, 0.0, 1.343597412109375, -2.4735107421875, -0.013092041015625, 2.989898681640625, -0.071929931640625, 1.5745849609375, 0.590087890625, -0.9259033203125, -0.975128173828125, 0.043060302734375, -2.2406005859375, 0.3811302185058594, 0.47931671142578125, -1.038299560546875, 1.44964599609375, 0.16869354248046875, 2.2147216796875, 1.6071014404296875, 0.663330078125, 0.059295654296875, -0.9571380615234375, 1.5022430419921875, -0.2692146301269531, 0.5635147094726562, 1.20123291015625, 0.720458984375, 0.115478515625, -0.9432220458984375, 3.82501220703125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000033.npy"}
{"epoch": 0.06910994764397906, "step": 34, "batch_size": 128, "mean": 0.6717020273208618, "std": 1.2222994565963745, "min": -1.97247314453125, "p10": -0.6306667327880858, "median": 0.5151443481445312, "p90": 2.26529541015625, "max": 3.83038330078125, "pos_frac": 0.6875, "sample": [0.4551239013671875, -1.2632598876953125, -1.6722412109375, 0.5985565185546875, 0.547027587890625, -0.40468597412109375, 0.697418212890625, 1.214813232421875, 2.57275390625, 0.08570098876953125, 0.1918811798095703, 0.25310516357421875, 0.0045166015625, -0.4048919677734375, 0.37787628173828125, -0.3446044921875, 3.723297119140625, 1.949951171875, 1.1129302978515625, -0.04427909851074219, 0.23431396484375, -0.5834197998046875, 0.5313720703125, -0.22740554809570312, 3.333099365234375, 3.422149658203125, 2.3548583984375, 0.439849853515625, -0.420562744140625, -0.147491455078125, 0.62432861328125, 0.48830413818359375, -0.153411865234375, 1.929595947265625, -1.97247314453125, 0.9661483764648438, 0.46441650390625, -0.3091278076171875, -1.50921630859375, -0.09075927734375, 0.07242584228515625, 0.7618408203125, 1.282470703125, 0.77728271484375, 0.2918109893798828, 1.8099365234375, 2.0670166015625, -0.30377197265625, 1.6797866821289062, 1.0277099609375, 0.688995361328125, 0.743743896484375, -1.351776123046875, 1.72869873046875, 0.385009765625, 2.0020751953125, 0.758575439453125, 1.531219482421875, 1.1656341552734375, 1.409149169921875, -0.154571533203125, 1.478179931640625, -1.58270263671875, 0.6718597412109375, 0.167022705078125, 1.71893310546875, -0.338653564453125, -0.1048583984375, -0.973724365234375, -0.61285400390625, 0.38909912109375, 1.94866943359375, -0.93060302734375, 2.32135009765625, 2.50518798828125, -0.6722297668457031, -0.01789093017578125, 1.7850341796875, 2.5009765625, 2.6397705078125, 0.354095458984375, 0.9146881103515625, 3.83038330078125, -0.819122314453125, 3.70648193359375, -0.2219867706298828, 0.6481475830078125, 0.26148223876953125, -0.313140869140625, 2.09710693359375, 0.990814208984375, -1.51043701171875, -0.191131591796875, 1.71588134765625, 1.2856788635253906, 2.24127197265625, 3.587799072265625, -0.2579345703125, 0.9891204833984375, 1.0986328125, 1.902587890625, -0.4144439697265625, -0.598419189453125, 1.484588623046875, -0.03533363342285156, -1.39599609375, 1.3445281982421875, 0.0154266357421875, 1.707000732421875, 0.3551826477050781, 0.5246124267578125, -1.035858154296875, 0.5559120178222656, 0.3282623291015625, 0.08858871459960938, -0.2707328796386719, 0.6126556396484375, 0.987884521484375, -0.27685546875, -0.334381103515625, 3.44970703125, 0.109100341796875, 1.86956787109375, 0.792236328125, 2.03216552734375, 0.66693115234375, 0.30804443359375, 0.50567626953125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000034.npy"}
{"epoch": 0.0712041884816754, "step": 35, "batch_size": 128, "mean": 0.41681650280952454, "std": 1.3348251581192017, "min": -5.6175537109375, "p10": -0.7354553222656249, "median": 0.24811172485351562, "p90": 2.02764892578125, "max": 5.172332763671875, "pos_frac": 0.6328125, "sample": [-0.09615325927734375, 1.471435546875, -1.263153076171875, -0.7216796875, -0.4334564208984375, -0.0923004150390625, 2.308197021484375, 2.34375, -0.348480224609375, -1.516357421875, -0.1773223876953125, -0.3174152374267578, 1.5619964599609375, -0.7042236328125, 0.669708251953125, -0.07390594482421875, -0.669677734375, 0.67108154296875, -0.2487335205078125, 0.18805885314941406, -0.577880859375, -5.6175537109375, 1.3846588134765625, 0.357818603515625, 0.7320823669433594, 0.40362548828125, 2.0784912109375, 0.1443939208984375, -1.174835205078125, -0.2962646484375, 1.4458580017089844, 1.09967041015625, -0.34444427490234375, 5.172332763671875, -0.5963134765625, 1.4550628662109375, 0.8889083862304688, -1.2339935302734375, 0.5855598449707031, 0.0, -0.39427947998046875, 1.26593017578125, 0.006866455078125, 0.704437255859375, 3.797821044921875, -0.683868408203125, 3.553802490234375, 0.13275909423828125, -0.197784423828125, 0.8675365447998047, 0.9582061767578125, -1.2978515625, -1.677032470703125, 2.005859375, 0.0, -0.94720458984375, 1.1334304809570312, 0.8734054565429688, 0.016065597534179688, 0.9529266357421875, 0.4331474304199219, 1.91815185546875, -1.0532379150390625, 0.704620361328125, -2.200653076171875, 0.21868896484375, -0.72503662109375, 0.22354888916015625, -0.5051116943359375, 1.4964599609375, 0.1558380126953125, 0.894927978515625, -0.39144134521484375, 0.181976318359375, 0.5924072265625, -0.671417236328125, 2.583892822265625, 0.209197998046875, 1.4832839965820312, 0.1267986297607422, 0.272674560546875, 0.82891845703125, 0.2900409698486328, 0.387176513671875, 1.6342315673828125, 0.927764892578125, -0.418792724609375, 0.340972900390625, -3.185211181640625, -0.07950592041015625, 1.363922119140625, 1.1891860961914062, 0.068634033203125, 1.35369873046875, 0.1535797119140625, 0.840423583984375, -0.18719482421875, -0.759765625, 0.3294677734375, 0.6925811767578125, 0.0426177978515625, -0.446685791015625, 2.788909912109375, -1.9490509033203125, -0.6003570556640625, 0.38372802734375, 0.05196380615234375, -0.5152511596679688, 2.23687744140625, 2.52264404296875, 2.763702392578125, 1.7835311889648438, 0.49643707275390625, 1.393524169921875, 3.197052001953125, -0.0006103515625, 0.101287841796875, 0.18331146240234375, 2.288116455078125, -0.624542236328125, 0.29825592041015625, 0.708251953125, 0.5044269561767578, -0.149658203125, 1.4751739501953125, 1.6927490234375, -0.361297607421875, 1.8389892578125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000035.npy"}
{"epoch": 0.07329842931937172, "step": 36, "batch_size": 128, "mean": 0.6127804517745972, "std": 1.4980921745300293, "min": -2.99884033203125, "p10": -1.1262313842773435, "median": 0.4887199401855469, "p90": 2.6377281188964843, "max": 5.630279541015625, "pos_frac": 0.640625, "sample": [0.6231689453125, 1.269012451171875, 1.5479736328125, 0.4718780517578125, -0.300537109375, 0.9434967041015625, -1.444427490234375, 1.9981689453125, -0.9830245971679688, 1.281646728515625, 2.3309326171875, 2.6503372192382812, 1.1950225830078125, 1.2781982421875, -0.50054931640625, 2.1232452392578125, 2.406768798828125, 2.56005859375, 1.3561134338378906, -0.212615966796875, 0.18020057678222656, -0.063140869140625, -0.15631103515625, 0.5234222412109375, -2.30523681640625, 1.5149078369140625, -0.2138671875, -0.6063232421875, -0.0196380615234375, 2.63232421875, -0.184173583984375, -0.6650772094726562, 1.068267822265625, 0.462310791015625, -0.1993408203125, 0.8242645263671875, 0.49217987060546875, 1.0085601806640625, -0.046611785888671875, 2.7139739990234375, 1.2196426391601562, 0.295654296875, -1.7159461975097656, 0.0, 0.029815673828125, 0.6360626220703125, -0.4220733642578125, -0.5071792602539062, 0.9718093872070312, -0.2357330322265625, 2.820526123046875, 1.10638427734375, -2.9779052734375, 2.087249755859375, 0.495330810546875, 0.8410797119140625, -2.18798828125, 0.09062957763671875, -2.51641845703125, -0.3941497802734375, 4.17230224609375, 3.175567626953125, 1.40423583984375, 1.076873779296875, 0.965240478515625, 0.051296234130859375, 1.961334228515625, 0.2167816162109375, 5.630279541015625, 0.0096435546875, -0.4371490478515625, 0.989898681640625, 0.15924072265625, 0.21923828125, -0.82574462890625, 0.535186767578125, -1.4737548828125, 0.4074554443359375, 1.520599365234375, 3.1458740234375, -2.99884033203125, -1.055877685546875, -1.723480224609375, 0.5659255981445312, 0.416046142578125, 1.1997756958007812, 1.9339599609375, 3.66162109375, 0.541473388671875, -1.0347442626953125, 2.7562255859375, 3.015625, 1.570831298828125, 4.677001953125, 0.8141326904296875, -0.2517814636230469, 2.56890869140625, -0.048828125, -0.8675384521484375, 1.1016731262207031, -0.0176239013671875, 1.53350830078125, 0.34719085693359375, 1.134796142578125, -0.91314697265625, -0.13249588012695312, 2.7032470703125, 2.1499786376953125, 0.0, -0.111968994140625, 2.89019775390625, 0.933746337890625, 0.323577880859375, 2.61407470703125, -2.10211181640625, 0.485260009765625, -1.873687744140625, 1.39215087890625, 0.29958343505859375, 0.110321044921875, 0.5822296142578125, -0.09259796142578125, -1.610595703125, 1.23333740234375, -1.2903900146484375, -0.22801971435546875, -0.3158416748046875, 1.4523162841796875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000036.npy"}
{"epoch": 0.07539267015706806, "step": 37, "batch_size": 128, "mean": 0.4983428716659546, "std": 1.5664998292922974, "min": -3.332733154296875, "p10": -1.6595184326171875, "median": 0.61083984375, "p90": 2.5121826171875, "max": 4.792205810546875, "pos_frac": 0.640625, "sample": [0.6552963256835938, 1.38897705078125, 1.521240234375, 0.488067626953125, 0.7213459014892578, -2.5001220703125, 1.66107177734375, 1.142242431640625, 1.2763214111328125, -0.13982772827148438, 1.095611572265625, -0.6511688232421875, -1.031890869140625, -2.82928466796875, -0.69158935546875, -0.0662689208984375, -3.332733154296875, 1.1708602905273438, 1.9749755859375, 0.7344970703125, 0.42626953125, 1.0016326904296875, 0.7779159545898438, -1.41326904296875, -0.48250579833984375, 3.10711669921875, 0.6285247802734375, -0.262298583984375, 2.9251708984375, -3.008636474609375, 2.551788330078125, 0.390899658203125, 1.688018798828125, -0.8265399932861328, 4.792205810546875, 0.05072593688964844, -0.51422119140625, -1.64599609375, 1.4031982421875, 1.34039306640625, 0.9416351318359375, 1.63031005859375, -0.1716766357421875, 0.30377197265625, 0.772918701171875, 2.693603515625, -1.292572021484375, -0.19610595703125, 2.293212890625, -1.451416015625, 1.519622802734375, 0.4637908935546875, -0.08050537109375, 0.16053390502929688, -1.987640380859375, -0.613677978515625, 2.3003387451171875, 1.0455322265625, 0.00433349609375, 0.23870849609375, 2.79241943359375, -2.8008880615234375, -0.20847320556640625, -1.9629974365234375, 0.7703857421875, -2.447540283203125, 4.46148681640625, -0.16945266723632812, 0.2948493957519531, -0.44091796875, -0.5339584350585938, 1.987945556640625, -2.737579345703125, -0.408966064453125, 0.6328048706054688, 1.295013427734375, 0.107086181640625, 1.82550048828125, 1.20965576171875, -1.45703125, 2.653717041015625, 1.5644683837890625, 0.6510772705078125, -0.299652099609375, 0.22541046142578125, 2.495208740234375, -0.5228271484375, 1.2307586669921875, 0.50543212890625, 2.081787109375, -1.975616455078125, 2.015869140625, 1.166015625, -2.649078369140625, 2.719482421875, 0.8235015869140625, -1.8880615234375, 2.2678680419921875, 3.328857421875, 3.11346435546875, 1.3577423095703125, 1.4757080078125, 0.26223182678222656, -0.22966766357421875, 0.15956878662109375, -1.1644744873046875, 1.760101318359375, -0.3505401611328125, 0.837890625, 0.60711669921875, 1.65826416015625, 1.06805419921875, 0.6514053344726562, 4.45257568359375, 0.78363037109375, -1.691070556640625, 1.4274749755859375, -0.15946388244628906, 1.5647125244140625, -0.9742431640625, -0.14715576171875, -0.05120849609375, 0.6587371826171875, 0.20242691040039062, 0.61456298828125, 0.5785675048828125, -0.0064697265625, 2.63165283203125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000037.npy"}
{"epoch": 0.0774869109947644, "step": 38, "batch_size": 128, "mean": 0.7030527591705322, "std": 1.78289794921875, "min": -4.68780517578125, "p10": -1.22296142578125, "median": 0.5260467529296875, "p90": 2.9639572143554678, "max": 5.52569580078125, "pos_frac": 0.6171875, "sample": [1.6673583984375, 2.4677352905273438, 0.06051063537597656, -0.8991470336914062, -0.73345947265625, 1.66949462890625, 1.191009521484375, 2.618011474609375, 0.3664093017578125, -0.3482208251953125, 2.039642333984375, -1.682647705078125, 0.7056884765625, -0.604827880859375, 0.086029052734375, 0.5491714477539062, 2.13201904296875, 0.05457496643066406, 0.6575927734375, -4.39556884765625, 2.0180206298828125, -1.2161865234375, 1.811920166015625, -1.41802978515625, -0.02706146240234375, -0.8685302734375, 1.7042236328125, 2.655029296875, 1.69232177734375, 1.169769287109375, 0.86114501953125, 1.4343109130859375, -1.1863021850585938, 0.9710121154785156, -0.582672119140625, 4.0457763671875, 2.52459716796875, 0.12885665893554688, -0.469390869140625, -0.30035400390625, 5.52569580078125, -1.4644775390625, -0.18188858032226562, -0.6715240478515625, -0.94171142578125, 1.5914077758789062, -0.5664424896240234, -1.9390029907226562, 0.130584716796875, -1.57110595703125, 2.8560791015625, 1.65618896484375, -1.3987274169921875, 0.0, -1.03076171875, -1.23876953125, 0.5039443969726562, 3.391143798828125, 4.676849365234375, 3.2080535888671875, 0.32330322265625, -0.137542724609375, 0.5481491088867188, 4.71160888671875, 3.60467529296875, 0.90625, -0.348236083984375, -0.9417572021484375, 0.9898529052734375, 0.11600875854492188, 4.25067138671875, -1.993682861328125, -0.00223541259765625, 0.142974853515625, -0.7176971435546875, -0.134002685546875, -1.60223388671875, -0.15988922119140625, 0.454864501953125, 0.66455078125, 0.64215087890625, -0.3123779296875, -2.6739044189453125, 0.71343994140625, 0.36004638671875, 2.613922119140625, 4.551544189453125, -0.5839462280273438, 2.1129150390625, -0.21681594848632812, 0.724761962890625, -0.8092498779296875, 0.142333984375, 1.5273284912109375, 1.1109619140625, 0.3738555908203125, -0.13436126708984375, 2.19403076171875, -0.19708251953125, -0.6217041015625, -2.663604736328125, 0.7724151611328125, 2.713104248046875, 2.083770751953125, 1.6014251708984375, -0.2074718475341797, -0.006805419921875, 1.199737548828125, 0.818878173828125, -0.03546142578125, 3.565093994140625, 0.8084259033203125, 4.399871826171875, 1.0292892456054688, 2.708221435546875, -4.68780517578125, 2.84307861328125, 3.740142822265625, 4.226104736328125, 0.21630859375, 2.859344482421875, 0.964813232421875, 1.955780029296875, 1.4228248596191406, -0.18426513671875, 0.7693634033203125, -0.9228992462158203, 0.7222213745117188], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000038.npy"}
{"epoch": 0.07958115183246073, "step": 39, "batch_size": 128, "mean": 1.3699138164520264, "std": 1.885894775390625, "min": -2.6332244873046875, "p10": -0.6226249694824219, "median": 1.0410919189453125, "p90": 3.5535186767578124, "max": 8.267608642578125, "pos_frac": 0.75, "sample": [1.178558349609375, 1.9102783203125, 1.613555908203125, 0.6377105712890625, -0.8717041015625, -0.8229522705078125, 4.934356689453125, 0.446380615234375, -0.6233673095703125, 3.257354736328125, 0.587615966796875, 1.558013916015625, 2.6223297119140625, 4.6798095703125, 2.575958251953125, 0.1822662353515625, 0.1114654541015625, 2.775482177734375, -0.2290191650390625, -0.31453704833984375, 1.0788002014160156, -1.7682647705078125, 3.54107666015625, 2.96917724609375, 2.6461181640625, -0.3400535583496094, 2.8720703125, 1.777984619140625, 0.15009689331054688, 1.9477577209472656, 1.8524169921875, 0.048526763916015625, 0.474334716796875, 1.7841339111328125, -0.588470458984375, 0.13677978515625, 0.1920948028564453, 0.8331680297851562, 1.032989501953125, -0.17324066162109375, -0.1352081298828125, 0.206573486328125, -1.08251953125, 2.020965576171875, 3.5231475830078125, 2.194915771484375, 0.9030914306640625, -0.1888580322265625, 1.015289306640625, -0.6300811767578125, 2.90826416015625, 0.68939208984375, 0.2034759521484375, 0.465362548828125, 8.267608642578125, -0.8365020751953125, -0.302734375, 1.6178741455078125, 6.252655029296875, 5.90704345703125, 3.3870849609375, 3.582550048828125, 3.6789093017578125, -0.1604442596435547, -1.563385009765625, 1.9625244140625, 0.283447265625, 2.8264617919921875, 1.552001953125, 1.9864253997802734, 2.6439971923828125, 1.0495624542236328, 5.072235107421875, 1.9878387451171875, 1.3138923645019531, -0.9600982666015625, -0.152587890625, 0.8188629150390625, -0.28912353515625, 1.337890625, 0.2840557098388672, 2.278076171875, 0.9439697265625, 1.0491943359375, 3.323211669921875, 2.9538116455078125, 1.49615478515625, 0.229400634765625, 3.2972412109375, 1.0144271850585938, 0.6091136932373047, -0.5471649169921875, 2.5979385375976562, 1.078582763671875, 1.1064300537109375, 5.30963134765625, 5.469879150390625, 3.191741943359375, 2.819915771484375, 2.1861572265625, 0.7814178466796875, 5.744903564453125, 3.8624267578125, -0.011785507202148438, 1.720001220703125, 0.95819091796875, -0.213592529296875, -0.077117919921875, -0.09713363647460938, 0.2340545654296875, -2.6332244873046875, 2.624420166015625, 1.7991943359375, -1.718231201171875, 2.0052413940429688, 2.541778564453125, 6.196624755859375, -1.37689208984375, 0.978424072265625, 1.4894027709960938, 0.051685333251953125, -0.6223068237304688, 0.0, -0.2093353271484375, -1.7578125, 0.3195037841796875, 0.0850677490234375, 1.9474334716796875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000039.npy"}
{"epoch": 0.08167539267015707, "step": 40, "batch_size": 128, "mean": 0.7303168177604675, "std": 1.8872805833816528, "min": -3.61126708984375, "p10": -1.1448600769042967, "median": 0.4350700378417969, "p90": 3.145852661132812, "max": 9.88250732421875, "pos_frac": 0.625, "sample": [-0.12843894958496094, -0.8494873046875, 0.06784820556640625, 1.28363037109375, -0.659332275390625, 2.9235076904296875, -0.405975341796875, 0.2003173828125, -0.7849502563476562, 2.52044677734375, -0.25159263610839844, 1.4443893432617188, 2.669830322265625, 1.0478057861328125, -0.609344482421875, -1.994140625, 0.84637451171875, 0.79644775390625, 0.91375732421875, 1.8090362548828125, 4.10601806640625, -1.3333816528320312, 1.240753173828125, 0.44382476806640625, -1.053741455078125, 0.12432861328125, -0.8500900268554688, 0.10406875610351562, 0.8947906494140625, 0.3980064392089844, 0.975677490234375, -2.765045166015625, -0.772430419921875, 0.628143310546875, -1.68341064453125, 0.418853759765625, 0.5552978515625, -0.9686279296875, 4.5, -0.4853515625, -1.39959716796875, 3.9744873046875, -0.25617408752441406, 0.6361656188964844, 1.534942626953125, 1.610321044921875, 0.47064208984375, 4.06060791015625, 2.1624755859375, 0.7785625457763672, -0.5262069702148438, 0.31549835205078125, 1.0380859375, 1.0045166015625, -0.80194091796875, -1.7671279907226562, -0.93194580078125, 0.7767658233642578, -1.4319610595703125, 3.809326171875, -0.4683074951171875, 0.083160400390625, 3.10675048828125, 3.237091064453125, -0.07773971557617188, 1.94317626953125, 2.4442596435546875, -0.7065353393554688, 9.88250732421875, 1.34405517578125, 1.4820556640625, 6.44525146484375, 1.7206039428710938, 4.564971923828125, -1.12445068359375, -0.1784515380859375, 1.6136474609375, 1.4261322021484375, -0.30206298828125, 0.4263153076171875, 0.4078826904296875, 1.62152099609375, -0.08807373046875, 1.4016265869140625, -0.819366455078125, -0.520599365234375, -1.0275421142578125, -1.0089035034179688, -0.46306610107421875, 0.26812744140625, 1.010009765625, 1.0379638671875, -0.1194610595703125, -0.05316162109375, 1.129791259765625, -0.11029052734375, 4.15118408203125, 3.092529296875, 0.7218017578125, 2.8586997985839844, -1.9888916015625, -0.5070648193359375, -1.4235687255859375, 1.8083038330078125, -0.2565765380859375, 3.856475830078125, -0.30812835693359375, 2.210540771484375, 0.781219482421875, 0.15838623046875, 0.817657470703125, -3.61126708984375, 2.066802978515625, 0.0034332275390625, -2.0047607421875, 4.0238037109375, -1.1924819946289062, 0.881866455078125, -2.847412109375, 0.1816864013671875, 3.52886962890625, 2.44677734375, 2.78009033203125, 0.74462890625, 0.00177764892578125, 0.3306121826171875, -0.925994873046875, 1.195404052734375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000040.npy"}
{"epoch": 0.08376963350785341, "step": 41, "batch_size": 128, "mean": 0.8373294472694397, "std": 1.7566362619400024, "min": -4.75103759765625, "p10": -1.3312965393066407, "median": 0.6996040344238281, "p90": 3.144819641113281, "max": 6.3759765625, "pos_frac": 0.6640625, "sample": [2.54254150390625, 0.184967041015625, 2.60015869140625, -1.0385589599609375, 0.7164840698242188, 5.272064208984375, -1.837554931640625, -0.818695068359375, 0.7298583984375, -0.468353271484375, 3.879150390625, 3.66009521484375, 0.916656494140625, 3.85943603515625, 1.531036376953125, -0.66058349609375, -1.3514404296875, -1.23345947265625, -1.0342864990234375, 1.606201171875, -1.004974365234375, 3.1566009521484375, -0.2111492156982422, 1.4632110595703125, 1.6964263916015625, 0.6363868713378906, 0.35186004638671875, 0.792633056640625, 0.237548828125, -1.5211181640625, 3.1397705078125, -0.261138916015625, 2.535491943359375, -0.534576416015625, 2.13922119140625, 0.2262420654296875, 4.8338623046875, 4.51666259765625, -0.8752288818359375, 1.381011962890625, 0.164886474609375, 3.2939453125, 2.36932373046875, -0.1621551513671875, 0.4398193359375, -0.9379425048828125, 3.944366455078125, -0.6781005859375, 0.0, 1.64947509765625, -0.06775665283203125, 0.59716796875, 6.3759765625, -1.8509521484375, 1.45458984375, 1.17657470703125, 1.0498046875, 0.423492431640625, 2.6580429077148438, 2.44903564453125, 2.19219970703125, -1.7216796875, -0.08384323120117188, -0.21575927734375, 0.995269775390625, -0.47434234619140625, 0.6030960083007812, 1.522705078125, 4.3660888671875, 0.21474456787109375, 0.7941436767578125, 2.7193603515625, -2.4876708984375, 1.1250152587890625, 2.0391845703125, 2.14739990234375, -1.80987548828125, 1.936279296875, -0.257232666015625, -0.2755126953125, 0.1153717041015625, -0.2392578125, -0.433624267578125, -1.765380859375, 0.6827239990234375, 0.43366241455078125, -0.380889892578125, -1.3279342651367188, 1.202423095703125, 1.04827880859375, 0.628753662109375, 2.514129638671875, -0.1630859375, 1.629638671875, 1.60150146484375, 0.272857666015625, 0.461578369140625, -0.45477294921875, -1.339141845703125, 1.7694091796875, 0.86376953125, 0.532012939453125, 4.39752197265625, -2.2216262817382812, -0.6578826904296875, 1.704904556274414, 1.9053421020507812, -4.75103759765625, 1.0739593505859375, 0.2566375732421875, 0.7185821533203125, 0.28839111328125, 3.1630325317382812, -1.3398895263671875, -0.760711669921875, 1.556243896484375, -1.895721435546875, 0.726165771484375, 1.8117256164550781, 2.9154052734375, 1.7997016906738281, -0.4228668212890625, 1.626251220703125, 2.08856201171875, 1.2240753173828125, 1.8797607421875, 2.4980010986328125, 0.50799560546875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000041.npy"}
{"epoch": 0.08586387434554973, "step": 42, "batch_size": 128, "mean": 0.9899286031723022, "std": 2.423074960708618, "min": -7.36419677734375, "p10": -1.3472133636474608, "median": 0.6667861938476562, "p90": 4.376300048828124, "max": 9.1805419921875, "pos_frac": 0.6796875, "sample": [0.2068328857421875, 2.100189208984375, 6.179962158203125, -0.38934326171875, 2.528564453125, 0.7330322265625, 5.488006591796875, 1.717620849609375, 2.243743896484375, 0.30908203125, -1.3810272216796875, 4.874359130859375, 1.564453125, 0.6336212158203125, 1.6059341430664062, 1.823984146118164, 0.6590423583984375, 5.688720703125, -0.226226806640625, 0.24200439453125, -3.909210205078125, 9.1805419921875, -0.3770904541015625, -0.482696533203125, 0.8180694580078125, 1.70635986328125, 0.28558349609375, 1.8184814453125, 0.272308349609375, -0.37114715576171875, 0.317535400390625, 4.476531982421875, 0.741363525390625, 5.92999267578125, 2.2828826904296875, 2.577728271484375, 0.25128173828125, 2.1012496948242188, 0.8422088623046875, -0.09454345703125, -0.12068939208984375, -0.1925811767578125, 0.1551361083984375, 1.962310791015625, 2.1671066284179688, 0.7258720397949219, 0.2413330078125, -0.17101287841796875, -1.3327217102050781, 1.4316730499267578, 1.723236083984375, 0.2575492858886719, 3.4090728759765625, 0.35466766357421875, 0.0, 3.8226318359375, 3.766082763671875, 1.6905975341796875, 4.287078857421875, 1.646514892578125, 2.414764404296875, -0.3107452392578125, 1.088897705078125, 1.7803192138671875, 2.11090087890625, 0.20270919799804688, 1.07415771484375, 3.957244873046875, -0.6544647216796875, -0.8249359130859375, 1.86572265625, 0.49835205078125, 5.465576171875, -0.350982666015625, 0.069610595703125, -0.23984336853027344, 1.3205718994140625, -1.863189697265625, -1.947265625, 2.024066925048828, 5.54217529296875, 6.087646484375, -2.2977294921875, 0.06748580932617188, 1.1845703125, 0.9711112976074219, 4.333343505859375, -1.4773406982421875, -0.30503273010253906, -1.010284423828125, -1.84112548828125, 2.038909912109375, 2.5642852783203125, 3.60028076171875, -4.801025390625, 1.71807861328125, -0.50244140625, 1.9787139892578125, -1.40240478515625, 0.12073135375976562, 3.6214599609375, 0.7890472412109375, 0.5084915161132812, 0.5725250244140625, 0.674530029296875, -0.09832763671875, -1.1703033447265625, 1.0364837646484375, -4.427490234375, -0.87493896484375, -0.8223419189453125, -0.8555374145507812, -6.73675537109375, 0.28253936767578125, 4.7496337890625, 0.309173583984375, 0.17655181884765625, 1.124542236328125, 4.68109130859375, -0.222900390625, 4.79351806640625, -0.8642539978027344, 2.099578857421875, -0.53131103515625, -7.36419677734375, 2.56634521484375, -2.2703857421875, -0.0811767578125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000042.npy"}
{"epoch": 0.08795811518324607, "step": 43, "batch_size": 128, "mean": 1.2672030925750732, "std": 2.3100671768188477, "min": -8.5623779296875, "p10": -1.2617424011230467, "median": 0.9553375244140625, "p90": 4.6193603515625, "max": 5.651153564453125, "pos_frac": 0.7109375, "sample": [2.5634765625, 2.71502685546875, 2.854705810546875, -1.545013427734375, -2.72113037109375, -8.5623779296875, 4.9267578125, 1.25677490234375, -0.0283660888671875, 0.912322998046875, 3.017333984375, -5.1397705078125, -2.3583984375, 0.8593902587890625, -1.17645263671875, 0.6832275390625, 5.642333984375, 3.983001708984375, 5.651153564453125, 0.364501953125, 1.0251007080078125, 4.61090087890625, 0.735595703125, 0.7335128784179688, 0.5945510864257812, 4.8336181640625, 0.9718017578125, 1.1952362060546875, -0.351837158203125, -1.616180419921875, 0.627655029296875, 0.9425048828125, 2.751190185546875, 2.971588134765625, 4.63909912109375, 0.5024337768554688, 0.3177337646484375, 2.43878173828125, 0.9908714294433594, -0.948211669921875, 3.8359375, 1.3135223388671875, 2.48345947265625, 3.980010986328125, 0.3665904998779297, -1.8290786743164062, 2.01080322265625, 0.235595703125, 2.00799560546875, 3.55523681640625, 4.666259765625, 0.528472900390625, 3.70977783203125, 1.0214614868164062, 1.61285400390625, -1.0611801147460938, 1.81951904296875, -0.96575927734375, 2.204376220703125, -2.2771759033203125, 3.456787109375, 1.7139167785644531, 4.14935302734375, 2.20513916015625, 0.3529777526855469, -0.3005828857421875, -0.593658447265625, 2.4878387451171875, -1.2201614379882812, 1.491790771484375, -1.1907577514648438, 4.78125, -0.11138916015625, -0.631500244140625, -1.3609466552734375, 2.3475494384765625, 3.941650390625, 4.507598876953125, 0.18671417236328125, 5.4100341796875, 1.6759033203125, -0.069976806640625, 0.4528656005859375, -0.185882568359375, 4.884368896484375, 5.636932373046875, 4.01226806640625, -0.19036865234375, 0.157012939453125, -0.54803466796875, 2.499053955078125, 2.29656982421875, -0.72796630859375, -1.36114501953125, 0.5531597137451172, 3.3814697265625, -0.1860198974609375, 4.959075927734375, 0.8750762939453125, 1.963714599609375, 0.92706298828125, 0.26683807373046875, -0.268310546875, -1.3587646484375, -0.06522750854492188, 5.12396240234375, 2.87060546875, 1.84796142578125, 0.7434539794921875, 1.0256195068359375, 0.968170166015625, -1.83746337890625, -0.0958404541015625, -3.508758544921875, 0.6141815185546875, 1.05859375, 3.22503662109375, 2.3597412109375, 4.93505859375, 4.365875244140625, 0.78924560546875, 1.7007217407226562, 4.08758544921875, -0.6782417297363281, 0.682037353515625, -0.15025711059570312, 0.30645751953125, -0.5181598663330078], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000043.npy"}
{"epoch": 0.09005235602094241, "step": 44, "batch_size": 128, "mean": 1.6258784532546997, "std": 3.0492610931396484, "min": -9.403076171875, "p10": -1.2668840408325195, "median": 1.3397178649902344, "p90": 5.2942260742187495, "max": 10.28118896484375, "pos_frac": 0.7421875, "sample": [3.262176513671875, 1.803863525390625, 9.64990234375, 1.431640625, -0.4608154296875, -0.98974609375, 0.9702415466308594, 3.4677734375, 2.716339111328125, 0.5526790618896484, -1.77166748046875, 0.8419189453125, 2.690185546875, 0.1179351806640625, 0.8491630554199219, 0.97198486328125, 5.496551513671875, 0.51165771484375, 2.2364501953125, -9.403076171875, -0.691162109375, -0.2037353515625, -1.344451904296875, -2.31610107421875, 3.3251953125, 0.4068603515625, 4.436279296875, 0.7525043487548828, -0.11048126220703125, 5.764892578125, 1.4330673217773438, 4.22344970703125, 1.4394760131835938, 1.0166015625, 0.5936470031738281, 0.8177261352539062, 1.0269775390625, -1.2336406707763672, 1.037109375, 2.468505859375, 2.39105224609375, -0.51025390625, 0.041473388671875, 0.6597042083740234, -0.139862060546875, 3.99761962890625, -0.24346923828125, 7.2572021484375, 6.571502685546875, 2.75567626953125, -6.56243896484375, 3.1039505004882812, 3.2147216796875, -0.33926963806152344, -1.542633056640625, 3.362884521484375, 4.76708984375, -0.9733810424804688, 0.544952392578125, 4.8519287109375, 2.8429107666015625, 1.0291595458984375, -3.48187255859375, 1.27886962890625, 1.7862548828125, -0.8515357971191406, 4.568878173828125, 0.930908203125, 10.28118896484375, -0.07270240783691406, 0.8520355224609375, 4.551025390625, 0.9570484161376953, 1.8072357177734375, 1.982666015625, 0.4593238830566406, -4.9202880859375, 2.4661865234375, 0.25563812255859375, 5.0501708984375, -0.84564208984375, 7.504852294921875, 0.7015380859375, 5.0993804931640625, 0.8620204925537109, 1.827880859375, 1.826690673828125, 2.52642822265625, 6.3433837890625, 1.2689132690429688, 3.5887451171875, 3.89208984375, -5.538330078125, 1.4635200500488281, 0.9339447021484375, 1.505218505859375, 1.02191162109375, -0.44281578063964844, 5.69964599609375, 1.3346786499023438, 4.068206787109375, 1.344757080078125, 5.117584228515625, 3.0364990234375, -6.753204345703125, -0.8158111572265625, 5.2645263671875, 2.6139984130859375, 4.2106475830078125, 2.97296142578125, 2.8316574096679688, 8.4085693359375, 4.696868896484375, -4.166473388671875, -0.9660491943359375, -3.19873046875, -0.0413970947265625, -0.231658935546875, 6.01605224609375, 0.72509765625, 2.468597412109375, 1.67156982421875, -2.1666412353515625, 5.363525390625, 5.65087890625, -0.254608154296875, 2.9310684204101562, 3.970458984375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000044.npy"}
{"epoch": 0.09214659685863874, "step": 45, "batch_size": 128, "mean": 1.233678936958313, "std": 2.931100845336914, "min": -6.29498291015625, "p10": -2.074171447753906, "median": 0.7160253524780273, "p90": 4.74627685546875, "max": 13.0797119140625, "pos_frac": 0.671875, "sample": [2.20489501953125, -1.3230438232421875, 4.88653564453125, -1.8595962524414062, 1.14678955078125, 3.6365966796875, -0.3655509948730469, 3.536834716796875, -0.22878646850585938, 1.444091796875, 0.0, -3.1536865234375, 3.709228515625, 2.1031341552734375, -1.7712478637695312, 1.4026565551757812, -1.93853759765625, 2.148468017578125, -1.131500244140625, 1.8628196716308594, 3.0606689453125, 4.8023681640625, 0.64251708984375, -1.397705078125, 1.6382217407226562, 6.88690185546875, 0.36270904541015625, 7.8687744140625, 2.031951904296875, 3.46240234375, 1.2670707702636719, -2.12933349609375, 0.6611175537109375, 0.385894775390625, 0.0436248779296875, 0.31951904296875, 2.65582275390625, -0.11285400390625, 5.40301513671875, 4.49908447265625, -0.829010009765625, 1.8462982177734375, 0.500244140625, 0.8883018493652344, -2.24261474609375, -2.1510772705078125, 0.7709331512451172, 2.22210693359375, 3.6203155517578125, 1.3248786926269531, -0.161468505859375, 1.912628173828125, -2.06060791015625, -1.2156829833984375, 2.6750946044921875, -0.2931976318359375, 0.6287422180175781, 0.288055419921875, -4.744598388671875, 0.0738372802734375, 2.76824951171875, -3.57098388671875, 8.182891845703125, 1.380859375, 1.6418190002441406, -0.2313232421875, 4.435394287109375, 4.730712890625, 4.7825927734375, 3.8787841796875, 2.1306419372558594, -2.1058197021484375, -0.85894775390625, 5.741790771484375, 0.381378173828125, 1.03448486328125, 0.6389389038085938, 1.4405975341796875, -0.6764373779296875, -6.29498291015625, -1.265716552734375, 1.2105903625488281, 9.397796630859375, 2.1552886962890625, -0.4662628173828125, -0.85357666015625, -0.373443603515625, 0.2228546142578125, 1.4669418334960938, -3.41632080078125, 4.09661865234375, 4.492034912109375, 0.459503173828125, 3.87493896484375, 0.23321533203125, 0.6436767578125, 0.58343505859375, 0.527069091796875, -2.348663330078125, -0.5416107177734375, 13.0797119140625, 1.0648117065429688, 4.24383544921875, -2.0129013061523438, 0.9595794677734375, 1.9614181518554688, 0.512237548828125, 0.7723655700683594, -5.24249267578125, 3.319427490234375, -0.7046470642089844, 3.5155792236328125, 4.791656494140625, -0.6160888671875, 7.994903564453125, -0.037322998046875, 0.0032501220703125, 0.4888572692871094, 0.19631195068359375, -0.211944580078125, 3.67413330078125, -2.81658935546875, 4.579345703125, 1.064422607421875, 8.0211181640625, -0.505889892578125, -2.59991455078125, 1.1716690063476562], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000045.npy"}
{"epoch": 0.09424083769633508, "step": 46, "batch_size": 128, "mean": 1.4582431316375732, "std": 2.692007064819336, "min": -6.13531494140625, "p10": -1.0209312438964844, "median": 0.9945335388183594, "p90": 5.038787841796875, "max": 11.9324951171875, "pos_frac": 0.7265625, "sample": [4.1917724609375, 5.0966796875, 1.2983551025390625, -0.39153289794921875, 4.953643798828125, 6.40673828125, 7.3802490234375, 3.633544921875, 0.13818359375, 0.1757526397705078, 1.473236083984375, 0.171905517578125, -0.771209716796875, 1.41387939453125, 4.1661376953125, 4.042236328125, 0.2120361328125, 0.04483795166015625, 0.19540786743164062, -3.891326904296875, 0.425872802734375, -0.20086669921875, -2.973663330078125, 3.606170654296875, 1.3475189208984375, 2.50836181640625, 5.6986083984375, 1.18798828125, -0.12381744384765625, -0.3480720520019531, 0.1297740936279297, 0.8179931640625, 1.17913818359375, -0.295562744140625, 5.19195556640625, 3.82611083984375, 4.37042236328125, 0.50262451171875, -0.7574310302734375, 2.71246337890625, 0.308929443359375, 1.2064666748046875, 1.46661376953125, -1.26324462890625, 2.3083953857421875, 1.134033203125, 1.0182342529296875, 4.4104156494140625, -0.559722900390625, 4.14306640625, -4.4920654296875, 0.082672119140625, -0.4869976043701172, 2.440521240234375, -2.3333892822265625, 0.240814208984375, 1.10528564453125, 1.2027587890625, -6.13531494140625, -0.595428466796875, 5.687347412109375, 0.9708328247070312, -0.08544921875, 3.1927490234375, 0.3616485595703125, -0.8783187866210938, -1.39141845703125, -0.3682403564453125, 4.3170166015625, 3.31292724609375, 0.590850830078125, 5.7757568359375, 3.9701690673828125, 1.1795501708984375, -3.52581787109375, 0.84423828125, 0.73626708984375, -0.205780029296875, 1.8570938110351562, 0.6959228515625, 5.01397705078125, -1.011260986328125, 0.1503143310546875, 2.8210906982421875, 11.9324951171875, 3.3305130004882812, -0.15606689453125, 0.0111846923828125, 1.2468414306640625, 5.9937744140625, -0.2638397216796875, 5.1741943359375, 0.0, 1.8779296875, 2.6202239990234375, -1.0434951782226562, -2.07135009765625, 1.34814453125, 3.31280517578125, 0.303924560546875, 1.99664306640625, 0.685150146484375, 2.6904296875, -0.3919258117675781, 8.33245849609375, -0.8464851379394531, -0.133026123046875, 0.964324951171875, 0.34127044677734375, 1.1521930694580078, 0.3578376770019531, -1.5782928466796875, -1.8068351745605469, -0.6323928833007812, 1.781839370727539, 2.89312744140625, 6.019561767578125, 3.25506591796875, 2.31878662109375, -5.374755859375, 4.95135498046875, 0.7588958740234375, 0.876312255859375, 4.7308349609375, 0.9290771484375, 6.27532958984375, 1.1629638671875, 3.36846923828125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000046.npy"}
{"epoch": 0.09633507853403141, "step": 47, "batch_size": 128, "mean": 1.058279037475586, "std": 3.5187549591064453, "min": -11.1065673828125, "p10": -2.905441284179687, "median": 0.9729080200195312, "p90": 5.257167053222656, "max": 11.966278076171875, "pos_frac": 0.6328125, "sample": [-1.9112548828125, 0.9452781677246094, 2.0282440185546875, -0.12786865234375, 3.6625213623046875, -3.5149459838867188, -0.120635986328125, 3.0446929931640625, 0.7679443359375, 0.435577392578125, 9.17193603515625, -5.2608642578125, 0.95880126953125, 0.42823028564453125, -1.2628326416015625, -0.8848991394042969, 2.4220428466796875, 0.5836410522460938, -1.71490478515625, 0.706573486328125, 2.468994140625, 1.8033447265625, 5.2867431640625, 1.511077880859375, 3.303924560546875, -1.5355987548828125, -0.4114837646484375, -2.736846923828125, 0.482208251953125, 1.0470218658447266, -0.1107177734375, 2.79083251953125, 6.94415283203125, -6.59771728515625, -5.501800537109375, 2.5428466796875, 3.16668701171875, 3.34893798828125, 0.863861083984375, 1.7262496948242188, 3.071075439453125, -5.6175079345703125, 0.797271728515625, -1.356353759765625, 6.2322998046875, 1.5752410888671875, 1.797027587890625, 5.416534423828125, 0.04341316223144531, 6.4078826904296875, 1.4249725341796875, 0.07841300964355469, -1.000732421875, 3.319976806640625, 0.3317413330078125, 0.360931396484375, 2.59173583984375, -1.585845947265625, 0.1736278533935547, -2.50018310546875, -2.56683349609375, 2.913381576538086, -0.9172286987304688, 2.69000244140625, 4.530853271484375, -1.922393798828125, 3.25946044921875, 1.7162399291992188, 0.01409912109375, -0.08286666870117188, 11.966278076171875, 5.70849609375, 3.488922119140625, 0.691497802734375, -0.7687606811523438, -0.2859344482421875, -7.77447509765625, 6.05584716796875, 4.02252197265625, 2.4340972900390625, -0.47607421875, -0.2279052734375, -0.161163330078125, 4.118865966796875, 4.072662353515625, -3.298828125, -0.6534576416015625, 1.3647613525390625, 1.1160736083984375, 5.0775909423828125, 4.050975799560547, 2.600830078125, -0.308380126953125, 2.72607421875, 1.75103759765625, 3.89910888671875, -1.1230926513671875, -3.6784439086914062, -6.63372802734375, -1.2460784912109375, 3.60565185546875, 1.35693359375, 2.310302734375, 6.09619140625, -6.736480712890625, -6.0533294677734375, 5.2444915771484375, 0.0, -1.7989044189453125, 4.743072509765625, -1.710601806640625, 4.5977783203125, 0.0, 6.5320587158203125, 7.546173095703125, 9.00665283203125, 0.9870147705078125, -0.5432357788085938, 3.196441650390625, -0.04196929931640625, -0.4081268310546875, 4.774322509765625, 1.519561767578125, 1.454803466796875, 1.349334716796875, -11.1065673828125, -4.51483154296875, 3.597412109375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000047.npy"}
{"epoch": 0.09842931937172775, "step": 48, "batch_size": 128, "mean": 1.5927886962890625, "std": 3.3689463138580322, "min": -5.461212158203125, "p10": -1.2698822021484373, "median": 0.9703521728515625, "p90": 5.422735595703124, "max": 19.71087646484375, "pos_frac": 0.6796875, "sample": [2.7939605712890625, 0.588836669921875, 1.9890365600585938, 6.080528259277344, 10.49285888671875, 1.842498779296875, 5.3076171875, 2.550018310546875, -1.0196533203125, -3.827545166015625, -0.0667724609375, -0.14382553100585938, -1.19110107421875, -0.361114501953125, 8.0750732421875, 1.9505805969238281, 0.78558349609375, -5.025665283203125, 1.877105712890625, -0.57672119140625, 2.23016357421875, 1.2928314208984375, 1.317718505859375, 1.4253921508789062, 1.0985565185546875, 3.920867919921875, -0.2712249755859375, 9.406982421875, -0.918426513671875, 1.87701416015625, 0.7087669372558594, -2.18841552734375, 2.598175048828125, 1.803131103515625, 2.8768310546875, 8.713348388671875, 0.20659446716308594, 2.47442626953125, 1.3421478271484375, 5.21173095703125, 1.6041259765625, 0.7038497924804688, 4.407135009765625, 2.026153564453125, -0.1891937255859375, 1.0833206176757812, 0.867919921875, 4.8380126953125, 4.027976989746094, 2.5250244140625, -4.3414459228515625, 1.04034423828125, -1.2177886962890625, 3.7581787109375, 0.0, 6.77978515625, -1.35693359375, 5.28070068359375, 0.864959716796875, -0.20459747314453125, -3.0986785888671875, -2.390096664428711, 0.267669677734375, 1.9250373840332031, 3.639312744140625, -0.644683837890625, -1.788238525390625, -0.58807373046875, 0.5453872680664062, 3.20947265625, 0.262176513671875, -0.2104644775390625, 1.8160324096679688, 0.17002105712890625, -0.35970306396484375, 0.4833831787109375, 1.2553634643554688, -0.211517333984375, 0.22365379333496094, 1.327484130859375, 0.0, 0.4182891845703125, 2.1744384765625, 1.1516532897949219, 0.56817626953125, 4.1534271240234375, 19.71087646484375, 0.821075439453125, -0.18592071533203125, 2.107666015625, -1.1740875244140625, -0.1500091552734375, 2.21282958984375, 0.45172119140625, -2.24017333984375, 0.809356689453125, 1.1011543273925781, -5.461212158203125, -2.5170936584472656, 11.55902099609375, 2.1539535522460938, 0.06604766845703125, -4.122406005859375, 1.4180450439453125, -0.28399658203125, -0.36102294921875, 1.9654998779296875, 7.666839599609375, -1.232574462890625, 3.5550537109375, 0.4412384033203125, 0.0825653076171875, 5.69134521484375, -0.003021240234375, 0.7114334106445312, 2.314910888671875, -2.020050048828125, 2.9920425415039062, 0.900360107421875, -1.1162605285644531, -0.1941375732421875, 1.2525711059570312, 4.9091796875, 7.4940185546875, -0.538055419921875, 3.700836181640625, 8.2196044921875, 7.122772216796875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000048.npy"}
{"epoch": 0.10052356020942409, "step": 49, "batch_size": 128, "mean": 1.6596479415893555, "std": 3.532806634902954, "min": -9.10638427734375, "p10": -2.506268310546875, "median": 0.9037542343139648, "p90": 6.045419311523437, "max": 10.1063232421875, "pos_frac": 0.6796875, "sample": [-0.426605224609375, 4.6767578125, 2.004364013671875, 0.8745574951171875, 5.1613922119140625, 2.4788436889648438, 0.8583984375, 1.3985118865966797, 0.430084228515625, 2.0208053588867188, -1.0500335693359375, 5.473846435546875, -0.58636474609375, -0.454437255859375, 0.054168701171875, -6.5890655517578125, -0.2500114440917969, 1.8169403076171875, 0.24432373046875, -2.9401702880859375, -0.36639404296875, -0.8568115234375, 0.7111072540283203, 2.1744384765625, 0.8019790649414062, 6.475250244140625, 7.11651611328125, 0.24816131591796875, 2.6953163146972656, 1.1305160522460938, 3.5574798583984375, 2.6310501098632812, -2.7631378173828125, 3.757904052734375, 5.271728515625, 5.991668701171875, 0.354827880859375, 1.97760009765625, 0.3023223876953125, 0.105377197265625, -1.775970458984375, -0.3702545166015625, 5.38238525390625, 4.774444580078125, 0.4691658020019531, 5.5495452880859375, -2.498302459716797, 2.87237548828125, 5.856536865234375, 6.87835693359375, 2.47882080078125, 4.488311767578125, -2.50262451171875, -0.008087158203125, -4.0003662109375, -2.5147705078125, 0.3819122314453125, 3.5162353515625, -2.76690673828125, 5.059051513671875, -0.21082687377929688, 7.28033447265625, 2.2020492553710938, 0.8355712890625, 5.323455810546875, 0.9329509735107422, -0.48297119140625, 1.338134765625, 1.1215972900390625, 5.79248046875, -2.2333221435546875, 0.5553436279296875, 7.572113037109375, 0.740997314453125, -2.63311767578125, 10.1063232421875, -0.8665695190429688, 0.4578704833984375, 0.16072845458984375, -0.4105224609375, 0.080902099609375, -1.5906829833984375, 1.0612220764160156, 5.593841552734375, 8.909149169921875, -1.005859375, -4.155609130859375, 5.635009765625, 8.512176513671875, 4.20367431640625, 0.972625732421875, -0.8134880065917969, -9.10638427734375, 0.8576717376708984, -0.346588134765625, -3.33038330078125, -4.54901123046875, 2.7103271484375, -0.644195556640625, 8.7020263671875, 4.124183654785156, 2.7472076416015625, 3.071044921875, 5.7955474853515625, 0.5940093994140625, 6.636749267578125, 1.6273422241210938, 5.1282806396484375, -0.206939697265625, 6.17083740234375, 2.65625, 0.02069091796875, -9.054473876953125, 5.804351806640625, 7.1004791259765625, -0.182403564453125, 5.2309722900390625, 0.5593299865722656, 4.291694641113281, -1.008331298828125, 4.921821594238281, 9.52008056640625, 2.6862030029296875, -2.9710693359375, 3.113861083984375, -1.271026611328125, -1.2290802001953125, -0.504791259765625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000049.npy"}
{"epoch": 0.10261780104712041, "step": 50, "batch_size": 128, "mean": 1.0693821907043457, "std": 3.9941670894622803, "min": -10.22613525390625, "p10": -3.5250183105468746, "median": 0.9345626831054688, "p90": 5.5641448974609355, "max": 12.2362060546875, "pos_frac": 0.6015625, "sample": [2.7393341064453125, -2.4038772583007812, 3.3338623046875, -0.3785285949707031, 4.132537841796875, -0.14544677734375, 2.50164794921875, 1.24957275390625, 2.96466064453125, -7.51385498046875, 5.13031005859375, 0.6617202758789062, -1.0174331665039062, -0.295135498046875, 9.216552734375, -0.10980224609375, -3.055908203125, -3.993621826171875, -2.919921875, -0.5344390869140625, -0.6284561157226562, -0.094390869140625, 1.10198974609375, 3.147857666015625, 1.0434722900390625, -0.968841552734375, -10.009033203125, -3.42791748046875, 12.2362060546875, 2.89532470703125, -1.58489990234375, -6.95904541015625, -1.1871337890625, 1.2527542114257812, -0.2593994140625, 0.00215911865234375, 0.230865478515625, 2.66522216796875, -3.191162109375, 1.073577880859375, -0.417724609375, -2.347625732421875, -5.767852783203125, 3.990234375, 2.9822921752929688, 1.0724563598632812, 1.3325042724609375, -1.3203582763671875, 2.462890625, -0.23825454711914062, -0.08428955078125, -1.2307567596435547, 3.34271240234375, 3.7314453125, 2.162750244140625, 5.112152099609375, -0.13261032104492188, 6.963531494140625, -3.7515869140625, -5.407318115234375, 4.830780029296875, 2.824249267578125, -2.6499481201171875, 0.5568790435791016, 0.9170379638671875, 10.90496826171875, -1.4378662109375, -0.06486129760742188, -1.4235038757324219, -5.0264892578125, 3.53985595703125, 5.33056640625, 4.03594970703125, 3.571746826171875, 3.0491256713867188, 0.0342254638671875, 6.235748291015625, -1.93780517578125, 2.81640625, 4.287353515625, -0.6661186218261719, 0.95208740234375, 0.42120361328125, -0.40570068359375, -2.111907958984375, 0.443878173828125, -0.2003326416015625, 4.701568603515625, 3.5162124633789062, -4.330169677734375, 4.756927490234375, -3.834228515625, 1.425323486328125, 1.2099189758300781, 7.138214111328125, 2.548980712890625, 2.611572265625, 4.269378662109375, 9.6671142578125, 2.1608047485351562, 0.4994659423828125, 4.3616943359375, 0.9844970703125, 6.6795654296875, 4.0318603515625, 1.156829833984375, -1.594573974609375, 11.63421630859375, 1.1812591552734375, 1.6487274169921875, -4.86572265625, 0.178924560546875, 6.109161376953125, -0.598480224609375, 7.828857421875, -0.24274826049804688, 0.6799697875976562, -10.22613525390625, -8.19769287109375, 3.1441268920898438, 10.341644287109375, 1.0605316162109375, 0.5949249267578125, -3.1712646484375, 1.114288330078125, 0.616546630859375, -1.0037841796875, 8.90911865234375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000050.npy"}
{"epoch": 0.10471204188481675, "step": 51, "batch_size": 128, "mean": 1.9140123128890991, "std": 4.984821796417236, "min": -13.062347412109375, "p10": -2.8446899414062496, "median": 1.4759950637817383, "p90": 8.318946838378906, "max": 18.0484619140625, "pos_frac": 0.65625, "sample": [3.805023193359375, -0.15883636474609375, 4.199369430541992, 1.8279380798339844, 11.19268798828125, 6.0182037353515625, 1.8678321838378906, 5.866943359375, 8.29791259765625, 0.668792724609375, 2.287689208984375, 5.6046295166015625, -5.528564453125, -8.8065185546875, 8.123947143554688, -1.5932769775390625, -12.733612060546875, -3.112274169921875, 0.08433341979980469, -1.3577880859375, -2.283843994140625, 2.6566925048828125, 7.9114990234375, -1.039398193359375, 1.38946533203125, -0.8257369995117188, -0.4294319152832031, 3.066680908203125, 1.908416748046875, 3.0991744995117188, -1.8392181396484375, 0.500030517578125, 1.4673709869384766, -1.3645553588867188, 5.239349365234375, 7.810760498046875, 9.253173828125, 4.290901184082031, 3.04901123046875, -0.71990966796875, -11.30615234375, -1.5400390625, 2.03277587890625, -1.9060821533203125, -13.062347412109375, 2.78729248046875, -0.72784423828125, 6.4347381591796875, 3.8112335205078125, 8.2628173828125, 1.7074241638183594, 1.95941162109375, -2.3173828125, -2.509979248046875, -0.7924346923828125, 8.433685302734375, -9.869903564453125, 0.575347900390625, -1.30706787109375, 1.4280242919921875, -2.730010986328125, -4.938507080078125, 0.519256591796875, 2.194366455078125, -3.51837158203125, 0.07763671875, 6.121124267578125, 6.49761962890625, 3.3288726806640625, 5.2015380859375, 8.766357421875, 3.61090087890625, 5.4292755126953125, 18.0484619140625, -0.3007049560546875, 1.36419677734375, 1.3776321411132812, 2.405731201171875, -8.17303466796875, 5.222076416015625, -0.942718505859375, 5.10247802734375, -4.737548828125, 0.0, 12.4007568359375, 10.32000732421875, 7.4113006591796875, 10.42291259765625, -1.876617431640625, 13.696044921875, -0.371795654296875, -3.1963043212890625, 8.368026733398438, 0.516632080078125, 2.3364028930664062, -0.29236602783203125, 6.098480224609375, 4.72393798828125, -0.6399040222167969, -1.09600830078125, -0.31725502014160156, 6.561553955078125, 4.660064697265625, 1.1258010864257812, 9.85601806640625, 0.5266189575195312, -0.55975341796875, 9.382171630859375, -1.53326416015625, 1.484619140625, -1.339508056640625, 0.7989692687988281, 8.1456298828125, -4.67138671875, 0.8501968383789062, 8.68634033203125, 2.287750244140625, 1.920013427734375, 1.73333740234375, 0.6827201843261719, -0.1207122802734375, 0.4408111572265625, 0.6725921630859375, 2.39703369140625, 2.681182861328125, 5.67767333984375, 0.1522979736328125, 2.275543212890625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000051.npy"}
{"epoch": 0.1068062827225131, "step": 52, "batch_size": 128, "mean": 3.072331428527832, "std": 5.047115325927734, "min": -8.96539306640625, "p10": -1.7145843505859373, "median": 2.2430343627929688, "p90": 10.611868286132811, "max": 19.73504638671875, "pos_frac": 0.7890625, "sample": [3.287384033203125, 1.97076416015625, -4.4852294921875, 0.7353515625, 1.0042133331298828, 2.3029022216796875, 2.7105712890625, 0.0867462158203125, 4.9834136962890625, 3.6686477661132812, 0.09490203857421875, 0.43935394287109375, 4.45794677734375, 10.02032470703125, 2.972808837890625, 3.89678955078125, 9.46624755859375, 2.517242431640625, -3.64422607421875, -1.6720123291015625, 0.2973785400390625, 11.823211669921875, 1.3397216796875, 0.6524200439453125, 1.963165283203125, 10.5267333984375, 0.8984832763671875, 2.1491165161132812, -0.47719573974609375, -1.5596923828125, 8.421142578125, 0.16451644897460938, 0.336883544921875, 2.370513916015625, 11.2037353515625, 1.8191299438476562, 0.47219085693359375, -4.32843017578125, 2.5663909912109375, 2.50439453125, 7.24468994140625, 1.4256362915039062, 2.121002197265625, 0.6257095336914062, 2.82904052734375, 2.731658935546875, -0.13232994079589844, 4.992218017578125, 0.60931396484375, -0.10973930358886719, 2.70166015625, 2.9295692443847656, -0.904052734375, 11.8946533203125, 3.5243988037109375, 2.18316650390625, 9.94281005859375, -5.513580322265625, 1.046356201171875, 6.20672607421875, 0.283447265625, 2.018238067626953, 2.5245437622070312, 6.44281005859375, 1.0911102294921875, 10.81573486328125, 1.6891632080078125, 1.608184814453125, 13.6190185546875, 0.3133888244628906, -1.587493896484375, -0.234222412109375, 3.1499557495117188, -0.7053909301757812, 1.534027099609375, 6.907073974609375, -0.9431514739990234, 4.260101318359375, 1.073394775390625, 4.02630615234375, 9.518951416015625, 5.8815155029296875, -5.01263427734375, -5.294219970703125, 15.408905029296875, 6.558032989501953, -7.768524169921875, 3.284942626953125, 19.73504638671875, 3.57403564453125, 3.3039093017578125, 0.15863037109375, 3.9642333984375, 4.184234619140625, -6.886444091796875, -1.4423675537109375, -1.8139190673828125, -7.614410400390625, 2.817962646484375, 12.08502197265625, 9.736557006835938, 1.771514892578125, 6.9627685546875, 5.7188720703125, 6.7018280029296875, -2.3689117431640625, -8.96539306640625, -0.40227508544921875, 0.9953384399414062, 1.0943450927734375, 7.747894287109375, -3.748046875, 1.5302810668945312, 8.2088623046875, 5.715911865234375, 11.424530029296875, 15.88134765625, 10.810516357421875, -0.86328125, 11.03271484375, 5.082000732421875, 14.341598510742188, 4.572418212890625, 0.9304370880126953, 2.6240692138671875, 0.20052337646484375, 10.25244140625, -0.5664443969726562], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000052.npy"}
{"epoch": 0.10890052356020942, "step": 53, "batch_size": 128, "mean": 2.269890785217285, "std": 5.501365661621094, "min": -13.7489013671875, "p10": -3.92432861328125, "median": 1.674591064453125, "p90": 10.34955596923828, "max": 16.07769775390625, "pos_frac": 0.671875, "sample": [5.9207763671875, -2.785186767578125, -6.650054931640625, -4.591041564941406, 0.821502685546875, 2.685455322265625, 2.934877395629883, -0.656890869140625, 5.2589111328125, -2.3856353759765625, 4.8154296875, 0.897247314453125, 7.95013427734375, -4.12957763671875, 5.523040771484375, 10.82513427734375, 4.07794189453125, -2.6868209838867188, 9.9725341796875, 0.051952362060546875, -1.1059799194335938, -7.09130859375, 1.8907241821289062, 0.5962371826171875, 2.739105224609375, 1.7865982055664062, 6.3511505126953125, 2.483001708984375, 5.6177215576171875, 5.1938934326171875, 2.3692474365234375, 1.353759765625, 3.45751953125, -0.65936279296875, 11.42431640625, -7.5823974609375, 7.432373046875, -13.7489013671875, 0.459228515625, 3.69195556640625, -1.779296875, -0.0627593994140625, -2.75006103515625, 9.9066162109375, 1.0339889526367188, -9.60833740234375, -0.08899307250976562, -1.3361282348632812, 3.9647769927978516, -1.2104167938232422, 1.4976081848144531, 5.82208251953125, -0.5797195434570312, 7.355796813964844, 1.7550201416015625, -6.50091552734375, -0.967132568359375, 8.50225830078125, -9.550384521484375, 10.655731201171875, 9.493927001953125, 0.0, 11.227813720703125, -0.4598388671875, 0.296600341796875, -2.209136962890625, -2.168426513671875, -1.35382080078125, 0.07916259765625, 0.6749343872070312, -1.0201034545898438, -2.633575439453125, 10.79443359375, 13.9783935546875, 4.521392822265625, 8.66619873046875, 7.920989990234375, 1.8868675231933594, -0.8973846435546875, 10.820327758789062, 14.71856689453125, 4.61944580078125, 11.015899658203125, 10.83203125, 0.22476959228515625, -1.0095748901367188, 0.17132568359375, 6.4115142822265625, 1.5941619873046875, -3.83636474609375, 2.9639511108398438, 1.16705322265625, 8.6629638671875, -2.7556610107421875, 1.861236572265625, 4.92340087890625, -5.4780426025390625, -8.95306396484375, 2.1092529296875, 6.6116943359375, 0.6622848510742188, 3.29522705078125, -4.13836669921875, -2.4786300659179688, 4.60626220703125, 7.88079833984375, 0.3166961669921875, 11.198394775390625, 10.028396606445312, 3.084156036376953, -2.8900222778320312, 1.12615966796875, -3.6284942626953125, 2.070465087890625, 6.27264404296875, 0.98504638671875, -0.01128387451171875, -9.215545654296875, 0.38620758056640625, 0.3342437744140625, 11.88226318359375, 1.0396728515625, 2.65179443359375, 16.07769775390625, 2.794677734375, 2.861968994140625, 7.09332275390625, 10.218338012695312], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000053.npy"}
{"epoch": 0.11099476439790576, "step": 54, "batch_size": 128, "mean": 1.8347129821777344, "std": 5.736918926239014, "min": -12.639312744140625, "p10": -5.282868194580078, "median": 1.2187824249267578, "p90": 9.136611938476559, "max": 17.46728515625, "pos_frac": 0.625, "sample": [8.025421142578125, 5.427215576171875, 8.574996948242188, 5.839202880859375, -2.1784515380859375, 2.5606765747070312, -0.2301025390625, 7.913055419921875, 12.362823486328125, 4.668769836425781, -7.055572509765625, -2.2026519775390625, -0.8821945190429688, -7.1519775390625, 6.454559326171875, 10.93096923828125, 3.429840087890625, 2.935333251953125, -2.67059326171875, 3.6075439453125, -6.30072021484375, 17.067138671875, 17.46728515625, 0.950958251953125, -5.316337585449219, -4.725776672363281, 1.1239395141601562, -2.9122772216796875, -0.5048828125, -2.067291259765625, 12.902435302734375, 0.1146240234375, 3.195159912109375, 1.3136253356933594, -4.19073486328125, 11.07049560546875, 13.53485107421875, -9.37347412109375, 7.357421875, 2.7225189208984375, 2.3970947265625, 7.841644287109375, 6.05999755859375, 0.48712158203125, 0.587646484375, 1.319244384765625, -0.3847007751464844, 2.480987548828125, 8.169845581054688, 2.5260009765625, -0.9012336730957031, 2.0758934020996094, 8.79791259765625, 5.705413818359375, 7.8191375732421875, 0.5091552734375, 8.62713623046875, 3.85675048828125, -3.6126708984375, 3.6820526123046875, -2.9675445556640625, -1.1558837890625, -0.787017822265625, 1.5732879638671875, 0.06731414794921875, -4.5130615234375, 1.8252105712890625, -5.3764801025390625, 4.816215515136719, 2.940288543701172, 14.25494384765625, -2.345672607421875, -4.2159423828125, 2.6851577758789062, 7.8184814453125, 0.8196563720703125, -1.0648193359375, 0.105072021484375, 5.14678955078125, -0.8233680725097656, 5.229766845703125, 3.521331787109375, -2.81011962890625, 3.6306915283203125, 9.926910400390625, -3.829875946044922, -7.793182373046875, -1.26361083984375, 3.3158798217773438, 5.0664520263671875, -1.006622314453125, 10.434326171875, -2.92230224609375, -1.39862060546875, 3.2400989532470703, -10.27532958984375, 0.5129241943359375, -2.0218963623046875, 10.91485595703125, -0.24899673461914062, 14.77020263671875, 1.1220245361328125, -7.552032470703125, -3.960235595703125, 3.55035400390625, -0.5353660583496094, 0.6804885864257812, 7.85406494140625, 0.5206222534179688, 3.204437255859375, -4.635711669921875, -5.791046142578125, 4.58099365234375, -2.66790771484375, 4.174293518066406, 0.1149139404296875, -12.639312744140625, 12.0582275390625, -0.3916015625, -5.268524169921875, -6.66204833984375, 5.364776611328125, 3.95428466796875, 0.089569091796875, -8.200775146484375, 0.22802734375, 3.914562225341797, 4.1084136962890625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000054.npy"}
{"epoch": 0.1130890052356021, "step": 55, "batch_size": 128, "mean": 2.3376388549804688, "std": 4.90138578414917, "min": -14.35772705078125, "p10": -3.0422630310058594, "median": 1.9186248779296875, "p90": 10.195187377929686, "max": 19.379852294921875, "pos_frac": 0.6484375, "sample": [-3.721405029296875, -0.5369110107421875, 0.359588623046875, -1.930816650390625, -1.663116455078125, 2.445587158203125, 2.6882476806640625, 3.138763427734375, 2.15093994140625, -3.408721923828125, 2.3549156188964844, 2.905620574951172, 2.53466796875, 11.34246826171875, 1.565704345703125, -2.62890625, 13.593963623046875, 1.98095703125, -3.01446533203125, 2.3409423828125, 7.361724853515625, -1.7323036193847656, 1.4943466186523438, 5.95892333984375, 1.6051559448242188, 14.0960693359375, 3.325439453125, -0.35717010498046875, 7.623046875, -1.3762359619140625, -0.972412109375, 0.3122406005859375, 1.7105255126953125, -3.478790283203125, 5.443820953369141, 2.727874755859375, -0.39306640625, 4.040435791015625, -0.13519287109375, 12.055908203125, 4.2419891357421875, 2.0068206787109375, 3.551483154296875, 10.151763916015625, 1.812103271484375, 2.1391754150390625, -1.92572021484375, 2.509735107421875, 5.15625, -2.3059024810791016, -3.286529541015625, 2.458721160888672, -4.709197998046875, 5.556079864501953, 3.167308807373047, 10.2965087890625, -0.43975830078125, -3.1071243286132812, -1.7548103332519531, 10.5025634765625, 3.490447998046875, -3.2021484375, 6.133930206298828, -1.749847412109375, 5.904052734375, 0.2704010009765625, 4.0144500732421875, 4.185066223144531, -0.7030029296875, -1.7171173095703125, -0.287109375, -0.14404296875, 10.927383422851562, -0.0630035400390625, 0.544342041015625, 11.2254638671875, -7.009033203125, 9.93804931640625, -0.4896125793457031, 1.898468017578125, -3.587127685546875, -0.2787017822265625, 0.021453857421875, -0.009168624877929688, 0.80108642578125, -4.543060302734375, 19.379852294921875, 2.87774658203125, 7.98486328125, 2.1449737548828125, -0.65972900390625, 5.7154998779296875, -0.09952545166015625, 2.5727386474609375, -4.021575927734375, 10.527099609375, 5.4293975830078125, 4.484100341796875, 0.442657470703125, 1.6424179077148438, 2.93511962890625, 2.750457763671875, 1.93878173828125, 0.312225341796875, 1.953277587890625, -4.007080078125, -1.7777099609375, 0.34612274169921875, -1.450714111328125, 0.9383087158203125, 13.896148681640625, 1.2516708374023438, -0.384796142578125, 4.8737335205078125, 3.4431419372558594, 5.076229095458984, 5.029518127441406, -1.8170242309570312, -14.35772705078125, 1.3993186950683594, 4.859619140625, 4.080352783203125, 12.08209228515625, -1.1247749328613281, -2.226837158203125, 14.998794555664062, 7.2458648681640625, 5.131706237792969], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000055.npy"}
{"epoch": 0.11518324607329843, "step": 56, "batch_size": 128, "mean": 3.142362117767334, "std": 6.515273094177246, "min": -14.93328857421875, "p10": -3.0473663330078122, "median": 2.3904876708984375, "p90": 11.712347412109375, "max": 26.291961669921875, "pos_frac": 0.6640625, "sample": [3.5625, -11.202072143554688, 6.36456298828125, 12.89117431640625, 2.8591842651367188, -3.115875244140625, 14.390472412109375, 2.9557037353515625, -1.8372459411621094, -2.2691879272460938, 11.822021484375, -4.5037994384765625, 0.0965423583984375, 0.756195068359375, -1.57366943359375, -2.39453125, -4.78082275390625, -2.6737060546875, 4.915863037109375, 4.0184326171875, 4.0283660888671875, -1.0683364868164062, -1.7853813171386719, -0.10381317138671875, -14.93328857421875, 2.85028076171875, 6.3575286865234375, -2.53912353515625, 4.504608154296875, -1.72149658203125, 0.7038002014160156, 5.053466796875, 3.3201141357421875, 0.7332305908203125, -0.5595703125, 4.0255126953125, -5.9763641357421875, 3.7465362548828125, 5.9678802490234375, 1.006256103515625, 2.480804443359375, 0.8934326171875, 8.923858642578125, -5.375885009765625, 8.166519165039062, -8.4775390625, 6.9885406494140625, -4.913665771484375, 2.442108154296875, 2.969928741455078, -1.4057540893554688, -1.722982406616211, 4.652992248535156, -6.48236083984375, 2.824859619140625, -2.2127227783203125, 4.556816101074219, 16.624755859375, 2.3388671875, 9.554962158203125, 0.6255874633789062, 1.9225292205810547, 2.5936126708984375, 0.406494140625, -1.0503082275390625, 8.803131103515625, 8.484756469726562, 8.072357177734375, 15.516677856445312, 1.57061767578125, 0.014068603515625, -0.11306381225585938, -1.146728515625, 26.291961669921875, 9.9061279296875, 9.113037109375, 0.3249702453613281, -1.7686634063720703, 5.155609130859375, 1.5629348754882812, -13.37005615234375, 1.1038055419921875, -0.32338905334472656, 5.294464111328125, -1.38201904296875, 13.940887451171875, -0.12456512451171875, 10.214385986328125, -3.84698486328125, 7.273765563964844, 0.47203826904296875, 15.24151611328125, -3.01800537109375, -0.16161346435546875, 5.721405029296875, 10.08984375, -0.4070281982421875, -2.0581893920898438, 0.5632247924804688, 1.1315193176269531, 1.1096076965332031, 22.81884765625, 5.379425048828125, -0.5985755920410156, 8.01361083984375, -1.548065185546875, 12.332550048828125, 6.1455230712890625, 10.322395324707031, 14.14764404296875, 9.461456298828125, 9.161590576171875, -4.957733154296875, 4.11761474609375, -0.7047271728515625, 0.8323211669921875, -3.0140457153320312, -0.1754913330078125, 7.5770416259765625, 3.03863525390625, 4.00421142578125, 7.8518218994140625, 1.042266845703125, 19.7021484375, 6.599090576171875, 11.66534423828125, 13.282684326171875, 5.2549591064453125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000056.npy"}
{"epoch": 0.11727748691099477, "step": 57, "batch_size": 128, "mean": 4.260931491851807, "std": 5.93695592880249, "min": -8.118698120117188, "p10": -2.322650909423828, "median": 3.4666309356689453, "p90": 12.765509033203124, "max": 19.65966796875, "pos_frac": 0.765625, "sample": [-2.3090667724609375, 7.75421142578125, -6.73406982421875, 0.791717529296875, -1.6735687255859375, 1.8753509521484375, 4.99658203125, 19.65966796875, 4.63653564453125, 8.600494384765625, 3.716796875, 4.959320068359375, 3.07537841796875, 7.39459228515625, -1.3635101318359375, 2.80908203125, 4.71051025390625, 11.2274169921875, 16.02593994140625, 12.65789794921875, 8.973388671875, 1.5445632934570312, 3.77801513671875, -3.609710693359375, 1.71875, -0.5814132690429688, 17.297760009765625, 11.194976806640625, 8.994720458984375, 1.067535400390625, 7.388526916503906, 11.721405029296875, -2.3543472290039062, -5.4495849609375, 13.0166015625, 1.3887443542480469, 5.68670654296875, 3.03289794921875, -7.640968322753906, 7.921875, 5.425729751586914, 13.770904541015625, -0.5162525177001953, 1.6304092407226562, 5.1806640625, 15.127899169921875, 3.721588134765625, 3.246826171875, -8.118698120117188, 4.4390869140625, 16.9195556640625, 6.39227294921875, 4.02392578125, -0.29144287109375, 11.261962890625, 3.6179580688476562, -7.62701416015625, -2.396331787109375, -0.8584213256835938, 13.4410400390625, 9.032791137695312, -0.32212257385253906, 1.4615364074707031, 0.2529716491699219, 3.7802734375, 2.145721435546875, 1.756683349609375, 0.205352783203125, 10.593475341796875, -0.19461441040039062, 6.974945068359375, 18.652008056640625, 1.3325653076171875, 10.297088623046875, -7.789825439453125, 5.0113525390625, -0.286376953125, 1.6161727905273438, 4.30810546875, 9.8214111328125, -1.5631103515625, -1.6166152954101562, 15.14813232421875, 1.60906982421875, 1.4441566467285156, 17.672607421875, 0.5602645874023438, 13.6566162109375, -2.356048583984375, -3.7886962890625, 7.724906921386719, 9.00823974609375, 10.273040771484375, -3.769622802734375, 15.95245361328125, 2.74755859375, 12.261016845703125, 2.0081024169921875, 9.263908386230469, 2.4431991577148438, -1.724456787109375, 4.7930755615234375, -0.533233642578125, 1.548095703125, -0.9495735168457031, -1.92547607421875, 4.7025146484375, 6.3133392333984375, 2.32061767578125, 2.64288330078125, 0.8109645843505859, 5.589019775390625, 4.313751220703125, 8.455352783203125, 3.6785430908203125, 3.0628662109375, 8.733230590820312, 3.005939483642578, -1.12554931640625, 1.702117919921875, 0.34314727783203125, 3.3153038024902344, 3.7401123046875, -4.840812683105469, 7.213417053222656, 0.03006744384765625, 9.48541259765625, 7.076507568359375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000057.npy"}
{"epoch": 0.1193717277486911, "step": 58, "batch_size": 128, "mean": 2.3468985557556152, "std": 5.927313804626465, "min": -22.036285400390625, "p10": -4.562904357910155, "median": 2.2385597229003906, "p90": 9.4465576171875, "max": 19.34124755859375, "pos_frac": 0.7109375, "sample": [6.456695556640625, 9.71368408203125, 3.937530517578125, 4.425445556640625, -1.855194091796875, -0.71649169921875, 7.135528564453125, -5.485588073730469, 1.8887710571289062, 8.55535888671875, 9.814445495605469, -2.21368408203125, 0.3222923278808594, -22.036285400390625, 0.19718170166015625, 14.70220947265625, 5.394805908203125, 4.43548583984375, -4.920989990234375, 9.479278564453125, 1.152252197265625, -7.4725341796875, 0.1571044921875, 5.2197723388671875, 5.282386779785156, -1.70751953125, 19.34124755859375, -6.552154541015625, -2.6226043701171875, 9.44000244140625, 1.7349395751953125, 2.4124298095703125, 2.6459121704101562, -1.5429000854492188, 2.2306900024414062, -15.02825927734375, -1.72088623046875, 6.997749328613281, 0.1427936553955078, 2.56787109375, 10.1925048828125, 11.64300537109375, 7.5306396484375, -7.507293701171875, -1.1711578369140625, -2.2585487365722656, 0.76373291015625, -0.9183807373046875, 1.8074951171875, 2.246429443359375, 3.341644287109375, -5.724330902099609, 1.0829620361328125, 0.4567375183105469, -2.42999267578125, -2.0966796875, 4.5731201171875, 4.8689117431640625, -0.19055557250976562, 0.5789794921875, -1.9254302978515625, 10.26397705078125, 8.497505187988281, 10.402587890625, 2.569854736328125, 6.793060302734375, 3.3039321899414062, 9.24346923828125, -11.6104736328125, -1.166900634765625, 0.1182708740234375, 1.8642578125, -6.9346923828125, 1.9803009033203125, 6.0048980712890625, 6.04058837890625, -3.0402069091796875, 5.516506195068359, 0.524688720703125, 7.62603759765625, -1.647216796875, 3.602752685546875, 5.56591796875, 6.243194580078125, 9.40069580078125, 3.080860137939453, 0.6098442077636719, 8.405303955078125, 3.7772140502929688, 9.46185302734375, 0.7777175903320312, 0.011688232421875, 6.67437744140625, 2.0610599517822266, -7.44610595703125, 9.138214111328125, 3.5742721557617188, 8.32501220703125, 0.970428466796875, -4.4094390869140625, 4.1358642578125, -7.285499572753906, 1.0157814025878906, 12.35504150390625, 10.004776000976562, -1.108154296875, 0.83135986328125, 6.0221405029296875, 6.149116516113281, -0.5558624267578125, 7.45159912109375, -2.8091278076171875, 17.324951171875, 2.668506622314453, -3.17169189453125, 3.663726806640625, 3.868377685546875, 1.9745025634765625, -7.7895965576171875, 2.51239013671875, 1.9171142578125, 2.8393402099609375, 8.77197265625, 3.829559326171875, -1.896881103515625, -3.33953857421875, 6.951995849609375, 1.123382568359375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000058.npy"}
{"epoch": 0.12146596858638743, "step": 59, "batch_size": 128, "mean": 1.6374738216400146, "std": 7.568989276885986, "min": -22.522796630859375, "p10": -5.545242309570312, "median": 0.7488479614257812, "p90": 10.654188537597655, "max": 22.52203369140625, "pos_frac": 0.6015625, "sample": [1.0841903686523438, 10.967178344726562, 4.62286376953125, -21.608795166015625, -4.6313018798828125, 10.520050048828125, 0.0, 10.35321044921875, 2.8820648193359375, 0.79522705078125, -5.91815185546875, 4.0793609619140625, 5.755462646484375, 6.686279296875, 7.440040588378906, 1.160125732421875, -5.1172332763671875, 16.764434814453125, 20.111785888671875, -2.10015869140625, -2.3349609375, 0.6541748046875, -4.5895538330078125, 5.7581787109375, 6.1116943359375, -0.402740478515625, 3.839599609375, 1.87640380859375, 18.107666015625, 0.36334228515625, -0.5147705078125, -2.8326263427734375, 0.6263580322265625, 7.924591064453125, 2.43597412109375, -4.855865478515625, -0.19222450256347656, 5.9576263427734375, 0.07074737548828125, -7.92193603515625, -0.2117156982421875, 0.287200927734375, 0.22585296630859375, -0.2212677001953125, 14.4354248046875, 21.847442626953125, -2.2002410888671875, 3.5839309692382812, 7.1735076904296875, 4.74298095703125, -3.16748046875, 11.391021728515625, -0.510894775390625, 2.2067718505859375, 3.2320632934570312, 3.769287109375, 3.0509262084960938, 7.40087890625, 9.258270263671875, -0.9925689697265625, 11.2296142578125, 6.1915740966796875, 4.56719970703125, -2.224456787109375, 4.5905303955078125, 4.031463623046875, 1.0345001220703125, 4.4062957763671875, 0.7982978820800781, 15.973388671875, 7.62744140625, 17.632080078125, -4.652587890625, 0.38226318359375, 1.039642333984375, 11.4239501953125, -0.7901611328125, -4.1559600830078125, -3.715179443359375, 1.2766494750976562, -4.327301025390625, 22.52203369140625, -1.8413200378417969, -14.8623046875, -0.24528121948242188, -9.74462890625, -5.8938751220703125, -5.14373779296875, -6.128692626953125, -8.769882202148438, 7.43035888671875, 4.181396484375, -7.91064453125, 3.8181381225585938, 3.6151123046875, 0.6453857421875, -5.3958282470703125, 9.049789428710938, -10.961883544921875, 0.5919265747070312, 0.7024688720703125, -15.529937744140625, 3.64263916015625, 21.73626708984375, -12.022216796875, -1.69775390625, -3.87518310546875, 3.2857131958007812, -4.7666015625, 3.98248291015625, 6.63262939453125, -2.177459716796875, 1.4441986083984375, -22.522796630859375, -3.387939453125, 0.37677574157714844, -0.39682769775390625, 3.132568359375, -1.1639404296875, 6.059844970703125, 0.3268909454345703, -0.6848316192626953, -4.557487487792969, 9.808761596679688, 0.9950103759765625, 0.14672088623046875, -4.48919677734375, -1.9291534423828125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000059.npy"}
{"epoch": 0.12356020942408377, "step": 60, "batch_size": 128, "mean": 3.356768846511841, "std": 7.401423454284668, "min": -21.481201171875, "p10": -4.62274169921875, "median": 2.612675666809082, "p90": 12.803266906738282, "max": 22.49114990234375, "pos_frac": 0.7265625, "sample": [-18.471145629882812, 10.8773193359375, 3.960956573486328, 0.660919189453125, 1.70416259765625, 8.510833740234375, 2.34246826171875, -5.2799835205078125, 1.7017135620117188, 0.724395751953125, 7.7056884765625, 18.33837890625, 9.20330810546875, 1.0626487731933594, -3.0039825439453125, 1.29046630859375, -21.481201171875, 12.032196044921875, -1.95233154296875, -4.4434967041015625, 0.4098968505859375, 14.586181640625, 0.02045440673828125, 5.1524658203125, 16.01678466796875, 3.7107696533203125, -1.6186065673828125, 4.0860595703125, 6.067779541015625, -0.1587677001953125, 0.5901947021484375, 0.12823486328125, 5.2711181640625, 7.62774658203125, 0.7268867492675781, 5.3742218017578125, 8.1746826171875, -11.020965576171875, 2.4710235595703125, -0.3730449676513672, 6.100273132324219, -6.670806884765625, 16.475921630859375, -5.589111328125, 3.424102783203125, 12.8721923828125, 0.1566638946533203, -4.80731201171875, 2.2174835205078125, -1.972076416015625, 1.24664306640625, 10.81927490234375, 3.51910400390625, 16.0838623046875, -1.5342254638671875, -5.6917266845703125, 22.448394775390625, 1.0214385986328125, 9.944137573242188, -16.76361083984375, 4.492401123046875, 2.329784393310547, 8.147918701171875, -4.54364013671875, 4.855316162109375, -13.2459716796875, 0.8474044799804688, 0.1256732940673828, -0.06418609619140625, 5.466579437255859, 3.729278564453125, 3.6773223876953125, -3.472715377807617, 3.0058975219726562, 1.782379150390625, 0.0, 7.9725189208984375, -7.6834716796875, 0.7083740234375, 6.07440185546875, 9.115013122558594, 13.11785888671875, 10.920684814453125, 5.23834228515625, 17.017303466796875, 10.089202880859375, 1.4415740966796875, 7.376434326171875, 4.759654998779297, 7.65533447265625, 4.37554931640625, 2.7543277740478516, 6.193382263183594, 11.706069946289062, -0.3426322937011719, 16.125701904296875, 0.3884105682373047, 6.017738342285156, -0.2579193115234375, 6.9110107421875, -0.7343292236328125, -5.533843994140625, -4.387578964233398, -0.81964111328125, -3.129270553588867, 16.56317138671875, -4.0785980224609375, 11.1265869140625, 2.1391220092773438, 9.373443603515625, 13.442222595214844, 0.14190101623535156, 1.0560836791992188, 4.2828216552734375, 1.7284317016601562, 6.450592041015625, 11.26263427734375, 0.0, 22.49114990234375, -9.79473876953125, -3.5843505859375, -0.00646209716796875, 9.79302978515625, 12.773727416992188, 4.17486572265625, 1.2706146240234375, 3.32855224609375, 9.50091552734375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000060.npy"}
{"epoch": 0.1256544502617801, "step": 61, "batch_size": 128, "mean": 3.169617176055908, "std": 8.12392520904541, "min": -21.66473388671875, "p10": -6.8314880371093745, "median": 2.6728477478027344, "p90": 14.27858734130859, "max": 22.01702880859375, "pos_frac": 0.65625, "sample": [-7.202362060546875, -11.281463623046875, -0.15304946899414062, -3.337982177734375, 0.18634033203125, 0.12500381469726562, 18.594329833984375, 3.375885009765625, -21.66473388671875, -1.05059814453125, -8.0513916015625, 1.743377685546875, 1.149383544921875, 3.48529052734375, -1.702606201171875, 15.223709106445312, 6.089141845703125, 13.70257568359375, 13.08038330078125, 20.49072265625, 1.8876914978027344, 18.830902099609375, -3.2724151611328125, 13.87353515625, 12.200347900390625, 0.832855224609375, 3.251556396484375, -1.169189453125, -1.73223876953125, 2.4100341796875, -14.254119873046875, 6.377685546875, 1.53033447265625, 6.0130615234375, -1.2666473388671875, 3.9927215576171875, -0.8141365051269531, 15.358444213867188, 6.779541015625, 5.366443634033203, 2.2766571044921875, 3.3097686767578125, 13.535186767578125, 11.677825927734375, 16.0904541015625, 20.228759765625, 20.105911254882812, 13.82098388671875, 11.3037109375, 7.58184814453125, 1.0145263671875, 3.6402130126953125, 15.2906494140625, 22.01702880859375, 8.244903564453125, 0.0, 0.2908172607421875, 6.34765625, 0.9365386962890625, -3.2994232177734375, -18.08795166015625, -1.9959182739257812, -2.444580078125, 8.004730224609375, 1.8822078704833984, 5.9878387451171875, 9.541732788085938, -14.75579833984375, -2.4262542724609375, -0.988128662109375, 11.70635986328125, -4.5914459228515625, 11.85113525390625, -2.642730712890625, 9.358978271484375, -6.98687744140625, 7.3501129150390625, 5.597293853759766, 16.34503173828125, 1.19366455078125, 7.725067138671875, -7.4132080078125, -5.357421875, 3.95697021484375, 0.445770263671875, 4.552570343017578, 3.525604248046875, 3.249114990234375, -2.5258140563964844, 4.22314453125, -1.6990470886230469, -8.635543823242188, -2.00921630859375, -3.6456527709960938, -1.7188720703125, 5.275421142578125, -3.58489990234375, 0.944061279296875, -2.59857177734375, 3.265687942504883, -1.38323974609375, 1.4408416748046875, 12.302642822265625, 4.790924072265625, 18.062713623046875, 16.721527099609375, 1.4229679107666016, 0.38266754150390625, 7.794342041015625, 4.770145416259766, 6.56072998046875, 0.36688232421875, -0.016324996948242188, -4.580169677734375, 4.7313690185546875, -6.764892578125, 2.9356613159179688, -11.367706298828125, 9.541839599609375, -2.34100341796875, -8.67431640625, 6.969757080078125, -0.12324905395507812, 3.7459793090820312, 4.191413879394531, 6.445281982421875, 12.0001220703125, -9.50286865234375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000061.npy"}
{"epoch": 0.12774869109947645, "step": 62, "batch_size": 128, "mean": 3.3609886169433594, "std": 7.632937908172607, "min": -15.710205078125, "p10": -4.990907287597656, "median": 2.328329086303711, "p90": 15.805876159667967, "max": 31.02679443359375, "pos_frac": 0.6640625, "sample": [16.71600341796875, 4.14703369140625, 16.349761962890625, 8.934616088867188, 7.9554595947265625, 6.763519287109375, 1.384695053100586, 0.4364185333251953, 1.0511016845703125, 6.52410888671875, 0.0712127685546875, -5.343780517578125, 6.568817138671875, 17.89752197265625, -15.710205078125, 16.2928466796875, 2.9664459228515625, 0.1915435791015625, -1.7723922729492188, 2.62567138671875, -8.136444091796875, 31.02679443359375, -7.551239013671875, -5.9573974609375, 3.71759033203125, -4.2851409912109375, 4.2435760498046875, 3.562744140625, 4.752685546875, -0.92889404296875, 0.4669189453125, 1.1570854187011719, 6.9681396484375, 3.347900390625, 8.52337646484375, 5.43182373046875, 2.016204833984375, -4.369171142578125, 5.7910919189453125, -1.9574432373046875, 4.679313659667969, 3.815826416015625, 7.36474609375, 1.6286468505859375, -5.85028076171875, 13.997344970703125, 4.46466064453125, -6.277099609375, 1.161346435546875, 8.391448974609375, 18.498504638671875, 3.5902557373046875, 8.899078369140625, 1.127532958984375, -3.7838516235351562, 3.52923583984375, -1.631591796875, 13.25830078125, 18.27191162109375, 0.0250244140625, -6.976776123046875, -0.022613525390625, 5.496650695800781, 1.1012401580810547, -10.50311279296875, -2.4450225830078125, -2.0555572509765625, -3.3074607849121094, 3.1143646240234375, 10.76800537109375, 7.96270751953125, -1.1008148193359375, -5.56884765625, 20.84375, -7.2514190673828125, 0.348052978515625, 15.70172119140625, 0.0, 7.042449951171875, -3.8566055297851562, 1.9379959106445312, -4.8396759033203125, 7.097633361816406, 17.5810546875, 16.048904418945312, 6.5153350830078125, 1.8488616943359375, 7.4411163330078125, -0.475311279296875, 7.4813232421875, 0.5969295501708984, -2.927215576171875, 3.5157623291015625, 4.087127685546875, 7.3718414306640625, 5.462127685546875, -4.500328063964844, 9.722900390625, -0.11419677734375, 0.9659423828125, -4.8139190673828125, -1.53314208984375, -0.18105316162109375, -1.3736572265625, 3.3107337951660156, 2.427734375, 1.869384765625, -0.66961669921875, -14.478302001953125, -1.2751083374023438, 7.8504486083984375, -0.1315765380859375, 6.04705810546875, -3.662811279296875, 1.484100341796875, 4.5812225341796875, 2.228923797607422, 16.362579345703125, 3.467193603515625, -3.55828857421875, 27.30078125, 18.2308349609375, 9.100067138671875, 10.9578857421875, 4.53369140625, -0.149810791015625, -7.4356689453125, -1.4229545593261719], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000062.npy"}
{"epoch": 0.12984293193717278, "step": 63, "batch_size": 128, "mean": 3.4834370613098145, "std": 7.400008678436279, "min": -15.35308837890625, "p10": -3.941748046875, "median": 2.5660476684570312, "p90": 13.373468017578123, "max": 21.53887939453125, "pos_frac": 0.6640625, "sample": [-0.641204833984375, 3.7408447265625, 0.3890876770019531, -13.62701416015625, -3.41033935546875, 1.6394615173339844, 2.265134811401367, 20.7308349609375, 8.50537109375, 11.478515625, 16.83770751953125, 16.499481201171875, -12.6123046875, 5.9333038330078125, -0.6870365142822266, 14.0208740234375, -0.887176513671875, -0.462982177734375, 4.42431640625, 11.4420166015625, -1.5269393920898438, 15.13275146484375, 11.123176574707031, 13.09600830078125, -15.35308837890625, 16.008316040039062, 6.538604736328125, 2.4596710205078125, -0.972991943359375, 4.8908538818359375, 1.7947864532470703, 20.58880615234375, -9.5040283203125, -0.5188884735107422, 4.4639892578125, -0.219451904296875, -3.08062744140625, 6.7705078125, -8.785049438476562, 3.64361572265625, -3.88037109375, -0.86328125, 15.55108642578125, -2.9454345703125, 7.512298583984375, 0.0, -1.3986682891845703, 12.314422607421875, -0.5967330932617188, 7.689422607421875, 20.30859375, 2.67242431640625, -0.45111083984375, 0.14520263671875, 1.25469970703125, -0.947052001953125, 8.139541625976562, 4.5272674560546875, 8.225654602050781, 0.5912189483642578, 2.3931121826171875, 3.6082763671875, -0.701416015625, 2.7281875610351562, 0.00705718994140625, 7.929779052734375, -10.84381103515625, -3.1846771240234375, -3.01068115234375, 1.1569538116455078, 3.761444091796875, -4.0849609375, -1.16815185546875, 12.962982177734375, 10.46099853515625, -4.875892639160156, 3.2677459716796875, -2.0120315551757812, 3.5948562622070312, 4.295101165771484, 8.082061767578125, 4.625, 15.642578125, -3.298614501953125, 2.3218231201171875, -0.4290924072265625, 3.471771240234375, -1.5431060791015625, 3.0253219604492188, 1.824493408203125, 11.076614379882812, -1.003997802734375, -6.787261962890625, 0.094970703125, 5.8349456787109375, 10.74444580078125, -4.86566162109375, -0.8023452758789062, 8.128173828125, 0.838714599609375, 3.1861114501953125, 9.488037109375, 17.77587890625, -14.8109130859375, 2.14630126953125, 5.5545654296875, 0.18337249755859375, 3.8525390625, 2.3487205505371094, 1.830718994140625, 6.845672607421875, 21.53887939453125, 12.948326110839844, 4.169742584228516, 3.2309112548828125, -3.524688720703125, 15.16064453125, 4.376495361328125, 5.43798828125, -0.488677978515625, -9.986892700195312, 9.146209716796875, 1.601959228515625, 12.229736328125, -5.95404052734375, 1.387552261352539, 12.072845458984375, 10.888153076171875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000063.npy"}
{"epoch": 0.1319371727748691, "step": 64, "batch_size": 128, "mean": 4.2558135986328125, "std": 8.60974407196045, "min": -22.74169921875, "p10": -4.927630615234375, "median": 3.7685890197753906, "p90": 14.061410522460937, "max": 29.20745849609375, "pos_frac": 0.6875, "sample": [8.25872802734375, 2.79833984375, -20.8499755859375, 18.67559814453125, 10.8919677734375, 3.4915924072265625, 9.062255859375, 1.8604049682617188, 28.051513671875, 6.050559997558594, 1.16729736328125, -5.78070068359375, 5.878936767578125, 5.2535858154296875, 6.29296875, 1.571929931640625, 0.985992431640625, -3.8416748046875, 2.5812225341796875, -6.270416259765625, -18.2047119140625, 5.792755126953125, -2.909393310546875, -5.868682861328125, -3.0956077575683594, 14.613616943359375, -0.3313713073730469, -1.1914386749267578, -0.4622955322265625, 16.844482421875, 3.758636474609375, -2.7697296142578125, -2.4689483642578125, 10.11859130859375, 3.6153564453125, 7.04571533203125, -1.1541748046875, 6.653800964355469, 4.786521911621094, 0.7747344970703125, 9.391082763671875, 5.3761749267578125, -0.4661102294921875, -2.4499359130859375, -2.4834957122802734, 3.965423583984375, 2.268230438232422, -2.3399505615234375, -5.19219970703125, 10.4183349609375, -2.47076416015625, 6.282196044921875, 2.7218017578125, 14.260955810546875, 3.685638427734375, 0.0, 5.234039306640625, 18.218963623046875, 2.9835205078125, 1.7554702758789062, -3.4746246337890625, 22.786468505859375, -5.033843994140625, 2.2952041625976562, 26.33355712890625, 8.219497680664062, 8.689010620117188, 6.41876220703125, 10.52581787109375, -1.4585342407226562, -3.23126220703125, 8.624237060546875, 1.7280426025390625, -15.454788208007812, 2.6067581176757812, 3.7785415649414062, -9.640045166015625, 5.991004943847656, 11.371803283691406, 5.373931884765625, 13.84722900390625, 10.130355834960938, -0.516387939453125, 13.3096923828125, 0.10630607604980469, 0.6360492706298828, 6.359039306640625, 11.878662109375, 3.5568199157714844, -2.8499755859375, 10.838592529296875, 29.20745849609375, 0.0, 12.54461669921875, 12.98394775390625, 7.9976959228515625, 15.748565673828125, 7.257843017578125, 4.036430358886719, -10.568267822265625, 5.376251220703125, 23.414398193359375, 18.3836669921875, -3.06768798828125, 9.472015380859375, 13.527130126953125, 4.20098876953125, -3.079437255859375, 13.97589111328125, 8.165451049804688, -5.048828125, 4.02313232421875, 7.2792205810546875, 0.5445556640625, -1.2290000915527344, -0.9853057861328125, 10.596633911132812, -4.882110595703125, 9.363006591796875, 0.8979225158691406, 7.22015380859375, 18.02685546875, 3.6056060791015625, -5.80596923828125, 11.500999450683594, 7.7680816650390625, -22.74169921875, -1.549346923828125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000064.npy"}
{"epoch": 0.13403141361256546, "step": 65, "batch_size": 128, "mean": 3.272204637527466, "std": 7.552777290344238, "min": -17.018646240234375, "p10": -4.727135467529296, "median": 2.408853530883789, "p90": 13.75512237548828, "max": 21.535003662109375, "pos_frac": 0.6640625, "sample": [-1.9454803466796875, -5.21038818359375, 6.5404052734375, 1.1368026733398438, -1.9539794921875, 9.456146240234375, 20.37481689453125, -2.423828125, 15.469757080078125, -1.04248046875, 2.413745880126953, -0.4638671875, 13.34259033203125, 4.17041015625, 2.3736419677734375, 11.069305419921875, 4.829437255859375, -1.343109130859375, 1.0448455810546875, -8.416488647460938, 5.5344390869140625, 7.133941650390625, -0.8134994506835938, 11.61651611328125, -11.763275146484375, -1.23382568359375, 0.0, -3.8552093505859375, 6.3892669677734375, 2.403961181640625, -8.23358154296875, -3.531757354736328, 14.018722534179688, 5.153993606567383, 1.1190834045410156, 11.011810302734375, 0.2935371398925781, 5.6092529296875, 0.03331756591796875, 1.7821617126464844, -2.218719482421875, -1.2047157287597656, 6.0027008056640625, 13.64215087890625, -12.7623291015625, 6.916473388671875, 3.5497283935546875, -6.50030517578125, -8.02374267578125, 0.216094970703125, 9.453155517578125, 10.62542724609375, 9.022552490234375, 9.154296875, -15.52789306640625, 4.0666046142578125, 18.0201416015625, 0.16876220703125, -5.6660003662109375, 3.4900665283203125, 9.066360473632812, -17.018646240234375, 0.622344970703125, 5.3900299072265625, 11.2991943359375, -10.2548828125, -0.302490234375, -12.520843505859375, 1.465179443359375, -4.1280364990234375, -3.90234375, 7.5318756103515625, 2.6426963806152344, 2.6372528076171875, 9.21856689453125, 2.3169631958007812, 11.0084228515625, 3.6731414794921875, 0.8369865417480469, 6.4680023193359375, -2.9814453125, 3.9059295654296875, 7.426361083984375, 7.642597198486328, 3.409912109375, 8.268997192382812, -1.1902618408203125, 17.33990478515625, 16.175079345703125, 5.4035491943359375, 8.652587890625, 21.535003662109375, -6.683929443359375, -4.0318450927734375, 14.5887451171875, 5.636199951171875, 4.509033203125, 7.550079345703125, -4.520027160644531, 14.49896240234375, 18.9215087890625, 0.5974578857421875, 11.91241455078125, 4.4061279296875, -4.5067138671875, 0.06826019287109375, 7.9481048583984375, -2.1138153076171875, -2.95001220703125, -1.9278564453125, 0.7340087890625, -1.4471549987792969, 0.3589935302734375, 0.9772567749023438, -0.712615966796875, -0.28566741943359375, 16.002227783203125, 13.49200439453125, 2.5616378784179688, 0.07305908203125, 14.88360595703125, 10.867431640625, 19.365234375, -4.458526611328125, 7.0361328125, -0.7080898284912109, 1.4696197509765625, -1.4232330322265625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000065.npy"}
{"epoch": 0.13612565445026178, "step": 66, "batch_size": 128, "mean": 4.085182189941406, "std": 10.273005485534668, "min": -20.556732177734375, "p10": -7.436889648437499, "median": 3.1535987854003906, "p90": 19.145687866210938, "max": 30.70721435546875, "pos_frac": 0.6640625, "sample": [6.470367431640625, 1.402212142944336, 22.952972412109375, -6.90826416015625, 3.1615447998046875, -13.97607421875, 4.792716979980469, -0.4856758117675781, -1.898488998413086, 2.4535369873046875, 26.332275390625, 7.648746490478516, 25.535491943359375, 1.50958251953125, -12.92999267578125, 0.6469650268554688, 17.39727783203125, -17.599807739257812, 6.2335052490234375, -6.27532958984375, -6.9598236083984375, -9.092758178710938, 7.08892822265625, 3.1070556640625, 5.995330810546875, 6.484437942504883, -2.913818359375, 11.8758544921875, 1.5987586975097656, -14.24517822265625, 1.213531494140625, 14.48297119140625, -2.2649307250976562, 15.98748779296875, -5.96392822265625, -0.6885147094726562, 3.7791748046875, 27.42498779296875, 3.521759033203125, -3.5302276611328125, -2.8419189453125, -14.733306884765625, 8.514335632324219, 10.766799926757812, 2.85003662109375, 10.365234375, 22.84881591796875, 1.80682373046875, 8.968719482421875, -1.3541488647460938, -5.08758544921875, -1.213623046875, 20.990875244140625, 2.6758575439453125, 1.03131103515625, 0.0, -4.0162200927734375, 9.283317565917969, 5.087554931640625, 3.1456527709960938, 1.6093597412109375, 23.052490234375, 8.428680419921875, 4.522125244140625, 7.608345031738281, 1.51446533203125, -1.172891616821289, -2.6778602600097656, -12.594970703125, 10.753128051757812, 20.945098876953125, 3.847625732421875, -4.73309326171875, 9.469894409179688, 2.1125335693359375, 1.1693115234375, 8.698471069335938, 10.275466918945312, -20.556732177734375, 2.2186431884765625, 30.70721435546875, -6.481597900390625, 13.208648681640625, 4.0489044189453125, 5.926994323730469, -0.9788742065429688, -1.91046142578125, 20.18658447265625, 10.6092529296875, 13.775192260742188, 3.5869064331054688, 5.32440185546875, -7.242340087890625, -0.28790283203125, 14.70855712890625, -8.42758560180664, 3.9683303833007812, 9.6737060546875, 4.6226348876953125, -0.3881988525390625, 3.842121124267578, 6.5958099365234375, -12.95343017578125, 29.35638427734375, -2.3903427124023438, 8.585983276367188, 14.3692626953125, -0.9717559814453125, 5.0370330810546875, 9.189056396484375, 19.357879638671875, 8.33319091796875, 8.959197998046875, -2.247407913208008, 19.05474853515625, -16.614593505859375, 6.650848388671875, -7.890838623046875, 8.73028564453125, 1.9102783203125, 2.1875, -18.30072021484375, 1.6404876708984375, -5.4332427978515625, 1.671966552734375, -1.2768173217773438, 28.37939453125, 15.557403564453125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000066.npy"}
{"epoch": 0.1382198952879581, "step": 67, "batch_size": 128, "mean": 3.709660053253174, "std": 8.975553512573242, "min": -19.835784912109375, "p10": -7.1272232055664055, "median": 3.3084068298339844, "p90": 15.249703979492187, "max": 27.3741455078125, "pos_frac": 0.6875, "sample": [1.83953857421875, 17.28851318359375, 4.8524169921875, 25.839599609375, -2.970306396484375, 7.023567199707031, 14.27679443359375, -6.3806610107421875, 3.8182525634765625, 17.081298828125, 4.070220947265625, -6.1643829345703125, -7.8955078125, 0.07865715026855469, -13.642547607421875, 20.96136474609375, 9.276344299316406, 0.12506103515625, 3.1431808471679688, -10.51904296875, -0.667633056640625, 3.9587249755859375, 7.791923522949219, 11.069091796875, 11.100784301757812, -6.261432647705078, 11.83209228515625, -3.855010986328125, 7.7426605224609375, -2.445240020751953, 0.08687591552734375, 18.7984619140625, -1.0863456726074219, -11.83575439453125, 2.3306961059570312, 11.1279296875, 0.0, 2.1799163818359375, 11.092742919921875, 3.8236122131347656, 3.37347412109375, 0.19512557983398438, 15.422119140625, -2.2589874267578125, 2.3037796020507812, 10.91583251953125, 1.79913330078125, 3.5173568725585938, 1.8199462890625, 3.543987274169922, 9.5933837890625, -5.5545654296875, 14.597442626953125, -0.3336944580078125, 0.4090118408203125, -1.660980224609375, -10.633331298828125, -0.5075569152832031, 14.440750122070312, -6.7979583740234375, 2.18377685546875, 2.983673095703125, 4.376953125, -2.0886077880859375, 9.88934326171875, -6.53033447265625, 3.8643875122070312, 23.414306640625, -2.9574432373046875, 9.018463134765625, 4.5925445556640625, 13.8521728515625, 4.1241455078125, 11.369552612304688, -13.2286376953125, 6.011688232421875, 19.304534912109375, -13.260406494140625, 1.529815673828125, 5.771722793579102, -4.026357650756836, 16.688934326171875, -19.835784912109375, 27.3741455078125, 1.4767913818359375, -1.7888336181640625, -4.155082702636719, -17.730194091796875, 2.0568084716796875, 15.048049926757812, 15.292022705078125, -14.9150390625, 5.009918212890625, -0.20686721801757812, 2.6998291015625, 3.263275146484375, 4.523719787597656, 11.89300537109375, 18.302978515625, 10.0762939453125, 3.3535385131835938, -1.1576118469238281, 8.30474853515625, -4.85369873046875, 7.268196105957031, -0.3642578125, -13.2076416015625, 1.3458690643310547, 6.392333984375, 9.553680419921875, 13.99755859375, 5.1703033447265625, 15.2315673828125, 10.392608642578125, 2.4891815185546875, -1.3009185791015625, -8.594757080078125, 3.0291213989257812, 8.347488403320312, 9.95599365234375, 8.772613525390625, 15.645034790039062, -10.59918212890625, 11.221359252929688, 2.41876220703125, -6.649635314941406, 7.86846923828125, 2.4657821655273438], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000067.npy"}
{"epoch": 0.14031413612565444, "step": 68, "batch_size": 128, "mean": 4.682193756103516, "std": 11.019929885864258, "min": -24.007293701171875, "p10": -7.222297668457031, "median": 3.5012588500976562, "p90": 16.86950073242187, "max": 38.8338623046875, "pos_frac": 0.6875, "sample": [10.45379638671875, 1.4722557067871094, -0.1689453125, -4.56304931640625, 36.736663818359375, 14.174041748046875, 0.14849281311035156, 20.40826416015625, 0.8148574829101562, 3.30194091796875, 4.755279541015625, 9.213577270507812, 16.3800048828125, 12.23138427734375, 3.625885009765625, -6.3863067626953125, 5.281229019165039, -1.1919326782226562, -18.209625244140625, 19.096954345703125, -4.8412322998046875, -9.56097412109375, 9.537841796875, 11.077743530273438, -6.9619140625, 7.0117950439453125, 1.5058860778808594, 3.98162841796875, 12.58111572265625, 1.4573478698730469, -7.3395233154296875, 3.5163116455078125, -4.364940643310547, 0.2405681610107422, 0.6774139404296875, 14.796295166015625, -2.7111053466796875, -2.951812744140625, -10.39068603515625, -13.9915771484375, 15.233169555664062, 2.4110488891601562, -6.48681640625, -9.05987548828125, -0.87371826171875, 12.648468017578125, 31.092437744140625, -6.00885009765625, -2.931884765625, 38.8338623046875, 0.5314102172851562, -9.8231201171875, 14.841537475585938, -0.3366584777832031, 2.4584197998046875, 13.434783935546875, 4.053955078125, 4.600311279296875, -7.17205810546875, 14.968536376953125, 13.352264404296875, 2.24859619140625, 0.3509979248046875, 7.3942108154296875, -4.493522644042969, 4.5070343017578125, 5.736480712890625, -2.2991714477539062, 3.4862060546875, -1.3295211791992188, 10.181861877441406, 0.34033203125, 37.895843505859375, 4.9019775390625, 4.547161102294922, 12.07562255859375, 10.697540283203125, 19.1915283203125, -1.612884521484375, 4.7138519287109375, 9.09600830078125, 1.8401718139648438, -12.57122802734375, 5.4703826904296875, 14.76953125, 6.43896484375, 15.497573852539062, 18.01165771484375, -9.5260009765625, 4.8673858642578125, 8.518829345703125, 2.9333419799804688, 4.11273193359375, -4.629913330078125, 3.14031982421875, 36.390625, -2.752471923828125, 18.927642822265625, 15.524322509765625, -6.78985595703125, 3.1451568603515625, -24.007293701171875, -0.7985305786132812, 5.3541107177734375, -0.057804107666015625, -17.56390380859375, 8.617996215820312, 19.34210205078125, 12.3631591796875, 6.6555938720703125, 1.040496826171875, 11.137588500976562, -3.66363525390625, 3.7444610595703125, 37.356201171875, -2.0384979248046875, 1.950836181640625, 8.990219116210938, 11.53912353515625, -9.840744018554688, 5.5950927734375, 0.6807861328125, 2.051788330078125, 18.30072021484375, 6.88555908203125, -9.844894409179688, -2.5066452026367188, 0.475433349609375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000068.npy"}
{"epoch": 0.1424083769633508, "step": 69, "batch_size": 128, "mean": 6.031038284301758, "std": 10.060450553894043, "min": -24.5926513671875, "p10": -5.804940795898437, "median": 4.51031494140625, "p90": 18.630422973632808, "max": 32.05230712890625, "pos_frac": 0.7578125, "sample": [10.400337219238281, 5.364501953125, 6.798564910888672, 11.34381103515625, 3.1095046997070312, 1.6650543212890625, 4.947509765625, 0.23286819458007812, 22.52294921875, 1.4027366638183594, 11.72052001953125, -1.518035888671875, 7.086631774902344, -2.219440460205078, 18.288116455078125, -2.12200927734375, -2.0969390869140625, 9.417083740234375, 4.285552978515625, 12.062911987304688, -0.27811431884765625, 2.7449188232421875, 25.337066650390625, 1.5542106628417969, -4.447864532470703, -10.670135498046875, 8.331426620483398, 2.4286231994628906, 3.937652587890625, 26.966552734375, 17.763214111328125, -5.799163818359375, 4.291748046875, -12.999176025390625, 2.2292404174804688, 7.283855438232422, 1.3035831451416016, -3.8045196533203125, 21.5906982421875, -6.065826416015625, -2.7158050537109375, 1.1814441680908203, -5.663993835449219, -5.224822998046875, 10.822357177734375, 20.9730224609375, 9.10302734375, -10.49420166015625, 3.659759521484375, 13.22625732421875, 19.42913818359375, 9.768402099609375, 11.74761962890625, 9.771942138671875, 30.743255615234375, 9.98828125, -1.724578857421875, 0.2027740478515625, 10.73614501953125, 7.413703918457031, -4.8472900390625, -8.460845947265625, 13.745185852050781, 8.245712280273438, 9.5955810546875, 2.388824462890625, 12.217361450195312, 4.155506134033203, 32.05230712890625, -15.410888671875, 1.2087593078613281, 4.7288818359375, 10.7396240234375, 29.41937255859375, 11.02569580078125, 16.82269287109375, -14.196868896484375, 2.859161376953125, 11.01116943359375, 1.8906002044677734, 0.552490234375, -0.520599365234375, 0.08599853515625, -4.1110992431640625, 9.626983642578125, 17.123504638671875, -7.531005859375, 11.28607177734375, 13.1011962890625, 23.731964111328125, 14.567138671875, 29.244857788085938, 12.125579833984375, 2.527679443359375, 2.3718299865722656, 0.525421142578125, 26.874908447265625, 1.6852645874023438, 21.123046875, 0.08010482788085938, 1.931732177734375, 6.858308792114258, -5.83447265625, 10.4327392578125, -0.4626808166503906, 17.423736572265625, -24.5926513671875, 12.718902587890625, -5.81842041015625, 10.5625, 2.5555572509765625, 14.974990844726562, 17.86480712890625, 3.457164764404297, 14.844451904296875, 10.293754577636719, 0.589996337890625, 3.9293975830078125, -5.9212799072265625, -4.46636962890625, 8.026412963867188, 10.5272216796875, -4.501518249511719, -8.11065673828125, 2.884540557861328, 10.108856201171875, 12.99627685546875, 7.7037811279296875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000069.npy"}
{"epoch": 0.14450261780104712, "step": 70, "batch_size": 128, "mean": 5.940929412841797, "std": 11.418109893798828, "min": -27.67559814453125, "p10": -4.731953048706052, "median": 4.040836334228516, "p90": 19.68597717285156, "max": 42.8958740234375, "pos_frac": 0.703125, "sample": [-16.230224609375, 12.369491577148438, 4.588592529296875, -3.7508697509765625, 3.16107177734375, -9.205535888671875, -0.900390625, 3.619964599609375, 12.2198486328125, 5.17596435546875, -0.00209808349609375, 11.3486328125, -3.4126968383789062, -12.55230712890625, 0.08502197265625, -3.639434814453125, 4.968505859375, 13.29656982421875, 6.480144500732422, 5.10430908203125, 18.559539794921875, 14.7034912109375, 8.736099243164062, 34.63873291015625, 29.1932373046875, 10.72705078125, 4.461708068847656, 14.01458740234375, 1.9310073852539062, 12.561264038085938, -26.571685791015625, -27.67559814453125, 1.153228759765625, 1.62255859375, 7.820526123046875, 7.392280578613281, 17.64520263671875, -9.228263854980469, -12.467071533203125, -0.8874359130859375, 1.6553878784179688, 19.079010009765625, 2.8270416259765625, 2.3232669830322266, -10.16668701171875, -0.5274200439453125, -3.3255081176757812, 21.81243896484375, 3.151447296142578, -3.186309814453125, -7.79791259765625, 2.1056365966796875, 5.27880859375, 30.8837890625, 2.34027099609375, 1.5479507446289062, 11.391265869140625, -0.855377197265625, 0.867645263671875, 1.82147216796875, -3.9262847900390625, -0.33853912353515625, 4.46673583984375, 13.37017822265625, -14.425537109375, 31.89959716796875, -6.475376129150391, 0.4770164489746094, 1.7786750793457031, -1.018350601196289, 27.80596923828125, 5.941200256347656, -13.677490234375, 15.254508972167969, 2.921630859375, 13.0257568359375, 11.5384521484375, -0.51708984375, 8.433074951171875, 0.9336872100830078, -1.533426284790039, 9.501556396484375, 3.0973434448242188, 18.34991455078125, 8.096733093261719, -3.984771728515625, 21.334075927734375, 2.40350341796875, -2.354084014892578, 31.62078857421875, 20.61773681640625, 18.929473876953125, 17.409088134765625, 10.525520324707031, 2.6425933837890625, -2.4783992767333984, -2.8126068115234375, 15.23468017578125, 24.57879638671875, 10.056793212890625, 8.668266296386719, -0.049530029296875, 5.548973083496094, 14.722808837890625, 6.163185119628906, 1.0063934326171875, 22.903167724609375, 10.004524230957031, 9.882171630859375, 13.853729248046875, -0.9212188720703125, 8.67535400390625, 8.088226318359375, 11.670967102050781, 24.39434814453125, 17.2708740234375, 9.202972412109375, -7.4155731201171875, -3.1567001342773438, -3.6750030517578125, 0.0, 1.4807510375976562, 17.78448486328125, 19.286651611328125, 42.8958740234375, 0.0, 0.6306610107421875, 0.53826904296875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000070.npy"}
{"epoch": 0.14659685863874344, "step": 71, "batch_size": 128, "mean": 5.065265655517578, "std": 12.88088607788086, "min": -39.84515380859375, "p10": -10.009959411621093, "median": 5.6278076171875, "p90": 22.913798522949214, "max": 35.032440185546875, "pos_frac": 0.7109375, "sample": [-4.2850799560546875, 30.765960693359375, 0.29988861083984375, -13.64373779296875, 7.230072021484375, 29.968597412109375, -21.199615478515625, 10.23529052734375, 1.510986328125, 6.5909423828125, 10.814697265625, 17.89898681640625, -9.67242431640625, -2.205230712890625, 7.622222900390625, 21.8563232421875, 1.0890636444091797, 7.18109130859375, 10.285457611083984, -13.615966796875, 0.067474365234375, 17.439422607421875, 13.383186340332031, 10.86358642578125, -12.164031982421875, -6.005401611328125, 1.050018310546875, 7.888404846191406, 1.3133544921875, 12.801475524902344, 7.818511962890625, 7.481954574584961, 7.455596923828125, -25.024139404296875, -5.028343200683594, 25.70135498046875, 17.308624267578125, 0.028041839599609375, 24.601104736328125, -15.678573608398438, 6.0318756103515625, -10.269073486328125, 7.39935302734375, 33.0380859375, -1.6243209838867188, 7.975273132324219, 4.258380889892578, 13.147064208984375, 11.9730224609375, 2.1639175415039062, 0.008716583251953125, 27.12579345703125, 10.3450927734375, -14.887222290039062, 0.5199508666992188, 14.2696533203125, 11.59075927734375, 21.334075927734375, -1.474517822265625, 8.69586181640625, 10.155570983886719, 21.6578369140625, 2.0560073852539062, 9.215682983398438, -20.114181518554688, 10.376693725585938, -0.40090179443359375, 7.083221435546875, 0.86956787109375, -1.270477294921875, -10.7686767578125, 5.371213912963867, 6.7006988525390625, -1.6779632568359375, 5.107421875, 27.832839965820312, 24.55084228515625, 3.5321197509765625, -1.9096107482910156, -9.6759033203125, 10.92840576171875, -9.71588134765625, -5.607219696044922, 17.067962646484375, 15.91070556640625, -2.37408447265625, 4.69384765625, 4.871950149536133, 13.3731689453125, 8.764068603515625, 6.293628692626953, 6.07855224609375, 4.4223785400390625, 6.7806396484375, 9.977874755859375, 5.65203857421875, -8.89306640625, 0.1180419921875, 5.429252624511719, 14.469818115234375, 1.5728073120117188, -0.18877792358398438, 5.60357666015625, -39.84515380859375, 18.38127899169922, 12.846893310546875, -11.126678466796875, 3.1551284790039062, 9.133941650390625, -7.42791748046875, 23.887908935546875, 35.032440185546875, 24.78466796875, -7.872802734375, -9.898910522460938, 3.3031063079833984, -9.34698486328125, 26.863418579101562, 9.509376525878906, 4.315244674682617, -2.001373291015625, -0.43492984771728516, 2.715087890625, 24.838302612304688, -0.9757328033447266, 22.496322631835938, -37.52886962890625, 7.9776153564453125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000071.npy"}
{"epoch": 0.1486910994764398, "step": 72, "batch_size": 128, "mean": 5.063143730163574, "std": 11.231489181518555, "min": -35.2314453125, "p10": -6.7671661376953125, "median": 4.304531097412109, "p90": 19.859136962890624, "max": 30.5203857421875, "pos_frac": 0.6953125, "sample": [13.257537841796875, 7.291717529296875, -2.2533416748046875, -7.33221435546875, -5.789947509765625, 0.47078704833984375, 6.7772369384765625, 6.23687744140625, -8.329517364501953, 4.181732177734375, -15.47760009765625, 4.29156494140625, -22.5264892578125, -9.51910400390625, -2.0863876342773438, 7.91082763671875, 7.4931488037109375, 13.3416748046875, 5.079917907714844, 3.8837127685546875, 17.334548950195312, 3.048004150390625, 8.322418212890625, 13.677978515625, 4.9196319580078125, 3.7447242736816406, -6.134204864501953, -1.505096435546875, -4.0934906005859375, -35.2314453125, 25.38555908203125, 0.815826416015625, 28.5889892578125, -2.19757080078125, 7.551887512207031, 17.5447998046875, 1.427093505859375, -3.9906578063964844, 15.080093383789062, 5.7744903564453125, 13.024078369140625, 20.8472900390625, 3.023193359375, 20.420196533203125, -15.4510498046875, 0.0, 20.7432861328125, 17.0538330078125, 0.7381572723388672, 3.70098876953125, -6.7787017822265625, 5.74772834777832, 10.860809326171875, 25.242095947265625, 0.0, 19.41070556640625, 3.1763458251953125, -5.05926513671875, 10.811569213867188, 3.5970458984375, -3.304180145263672, 0.0, 4.593250274658203, 14.065582275390625, 2.62969970703125, 1.5452022552490234, 2.7931060791015625, -0.4327392578125, 6.026092529296875, -6.710205078125, 19.618682861328125, 2.17596435546875, 7.165618896484375, 3.2363338470458984, -6.0330657958984375, 10.560165405273438, 22.755035400390625, -2.1322708129882812, 28.55841064453125, 16.40179443359375, 5.699188232421875, 9.334465026855469, 20.50250244140625, 4.317497253417969, -1.0877876281738281, -1.2530136108398438, 28.314849853515625, 30.5203857421875, 0.4877605438232422, 13.184539794921875, -13.197845458984375, 4.8687744140625, 0.5436058044433594, 9.736312866210938, -19.964324951171875, 23.383209228515625, 1.1007080078125, 5.55657958984375, 16.29217529296875, 18.20068359375, 12.566085815429688, 11.904220581054688, 4.627189636230469, -2.527435302734375, -6.549003601074219, -0.954620361328125, 7.2975921630859375, -30.794158935546875, -2.8014602661132812, -6.7622222900390625, 4.096523284912109, 16.95208740234375, 2.9264450073242188, 9.8382568359375, -8.5638427734375, 16.604045867919922, 11.932418823242188, 8.31658935546875, 1.040679931640625, -1.6351470947265625, -0.3856201171875, 6.847076416015625, 3.882232666015625, 14.795162200927734, 8.55499267578125, -7.07989501953125, 14.3031005859375, 21.524368286132812], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000072.npy"}
{"epoch": 0.15078534031413612, "step": 73, "batch_size": 128, "mean": 4.708438873291016, "std": 12.766937255859375, "min": -53.943511962890625, "p10": -11.256513214111328, "median": 4.676198959350586, "p90": 20.037882995605468, "max": 36.0430908203125, "pos_frac": 0.6875, "sample": [6.218769073486328, 6.9722900390625, 13.82440185546875, 15.271194458007812, 15.672698974609375, 30.083587646484375, -0.456390380859375, 5.7704620361328125, -27.0174560546875, -15.287139892578125, -1.5072860717773438, 1.943603515625, 7.674522399902344, 31.429168701171875, -11.60797119140625, -4.11834716796875, -9.01727294921875, 16.334442138671875, 11.599300384521484, 0.525177001953125, 3.6420822143554688, 7.11114501953125, -18.066925048828125, 15.320556640625, 8.74920654296875, 19.9495849609375, 12.09088134765625, 0.987823486328125, 5.856941223144531, -2.0554275512695312, 4.184761047363281, -1.45458984375, -9.26104736328125, 5.170478820800781, -12.385162353515625, 0.5516357421875, -11.105888366699219, 32.96142578125, 14.455459594726562, 6.47021484375, 3.9996337890625, 1.6571426391601562, 0.65582275390625, 8.7923583984375, 20.423324584960938, -53.943511962890625, 23.203826904296875, 3.9715499877929688, 23.623626708984375, -0.34862709045410156, -2.749980926513672, -13.730743408203125, -3.08282470703125, 17.194290161132812, 9.91949462890625, 13.51531982421875, -6.1256256103515625, 13.419586181640625, -2.678009033203125, 3.9050636291503906, 3.97723388671875, 36.0430908203125, -8.711639404296875, -3.2779083251953125, 1.3999481201171875, 14.776176452636719, 5.98223876953125, 12.039276123046875, 12.748382568359375, 4.873828887939453, -1.440765380859375, -0.5048446655273438, 10.27252197265625, 2.0711898803710938, 15.61538314819336, 3.2244949340820312, 4.9398651123046875, -18.998275756835938, 20.243911743164062, -12.421951293945312, -2.178314208984375, 7.1602020263671875, -1.62109375, 0.7426681518554688, 10.9713134765625, 12.222076416015625, -0.8336334228515625, 4.35394287109375, 2.0835418701171875, 12.410400390625, 5.361215591430664, 8.842269897460938, 26.75079345703125, 3.4581375122070312, 10.723297119140625, 7.148712158203125, 3.34796142578125, 24.687744140625, -1.1417007446289062, -1.3468093872070312, 31.793701171875, 0.0611114501953125, -5.17791748046875, -6.68536376953125, 8.442352294921875, 13.059097290039062, -16.88458251953125, -18.538589477539062, 0.26654052734375, 7.402099609375, 4.226104736328125, 5.3817138671875, -14.421340942382812, -13.905242919921875, -3.5519371032714844, 4.478569030761719, 31.23785400390625, 16.116363525390625, 12.8125, -5.993927001953125, 22.13470458984375, 13.23175048828125, 11.832000732421875, 16.15966796875, 6.363468170166016, 7.307342529296875, 11.260498046875, -10.82586669921875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000073.npy"}
{"epoch": 0.15287958115183245, "step": 74, "batch_size": 128, "mean": 4.466212272644043, "std": 12.129538536071777, "min": -24.811065673828125, "p10": -11.907862854003906, "median": 3.6120338439941406, "p90": 20.084454345703126, "max": 39.58453369140625, "pos_frac": 0.65625, "sample": [2.30523681640625, 20.0042724609375, -4.3092041015625, 1.5985107421875, -5.57476806640625, -3.6903076171875, 18.621566772460938, 19.223846435546875, 17.6141357421875, 27.610931396484375, -13.419601440429688, -5.4106597900390625, -2.300811767578125, 3.5478744506835938, -12.489349365234375, -21.40643310546875, 12.03082275390625, 0.275054931640625, 14.4014892578125, 5.3129119873046875, 11.882232666015625, -15.35736083984375, -7.553741455078125, -4.003753662109375, 8.317581176757812, 0.5789260864257812, 5.5525665283203125, -24.811065673828125, 8.722625732421875, 4.02679443359375, 33.8927001953125, 3.8359603881835938, 18.703292846679688, -0.23626708984375, -14.48577880859375, 2.03070068359375, -3.6155471801757812, 9.471855163574219, 0.4685516357421875, 13.846939086914062, 1.641510009765625, 10.224594116210938, 3.3856201171875, 9.181137084960938, 8.26885986328125, 10.448150634765625, -2.018280029296875, -4.000112533569336, 5.3458251953125, 39.58453369140625, 10.8775634765625, 2.1168212890625, 13.6748046875, -5.55572509765625, 7.8342132568359375, 21.0855712890625, 15.1932373046875, 3.0180816650390625, 3.843017578125, 6.65081787109375, 5.402362823486328, 11.357925415039062, 13.419296264648438, -1.92840576171875, 12.317436218261719, 10.433349609375, -7.5018310546875, 21.897415161132812, -0.6858844757080078, -7.373046875, -11.36083984375, -17.129913330078125, 24.5162353515625, -21.570709228515625, -14.40411376953125, -0.2802276611328125, 3.3904800415039062, 3.1312103271484375, -23.037078857421875, 20.27154541015625, -0.6368255615234375, 0.84149169921875, 7.258209228515625, 3.6761932373046875, 12.87738037109375, 12.039657592773438, 13.750991821289062, -3.747344970703125, 2.3328685760498047, -3.094440460205078, -0.40411376953125, -4.44720458984375, 14.14422607421875, 7.0965576171875, 7.039649963378906, 21.946319580078125, 0.0, 11.929443359375, -1.858734130859375, 25.72113037109375, -0.6328811645507812, 19.772796630859375, 1.189666748046875, -8.188720703125, -20.05181884765625, -11.853256225585938, -7.37969970703125, -16.57415771484375, 5.553466796875, 7.03668212890625, -2.69970703125, 20.3359375, 7.4132080078125, 5.69775390625, -7.1978607177734375, 21.037078857421875, 12.363861083984375, -12.0352783203125, 2.7563018798828125, 2.9870529174804688, 3.0111083984375, 14.30029296875, 12.182052612304688, 9.22930908203125, 33.13824462890625, 16.039520263671875, 1.051300048828125, 29.849334716796875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000074.npy"}
{"epoch": 0.1549738219895288, "step": 75, "batch_size": 128, "mean": 6.661381244659424, "std": 11.486974716186523, "min": -28.47136688232422, "p10": -6.88671646118164, "median": 6.110267639160156, "p90": 21.06065979003906, "max": 36.72637939453125, "pos_frac": 0.7421875, "sample": [-4.300537109375, 7.546398162841797, 1.910980224609375, 3.896881103515625, 15.7784423828125, 6.3274078369140625, -0.7751235961914062, -7.407524108886719, 29.8555908203125, 0.76068115234375, 21.904754638671875, 10.774658203125, 9.111328125, -0.813720703125, 32.56787109375, 16.098861694335938, 2.888439178466797, 6.65435791015625, 21.496337890625, -3.044830322265625, 0.6491622924804688, 14.4107666015625, 2.724029541015625, 12.45064926147461, 25.458465576171875, 19.371841430664062, 13.303955078125, -14.320587158203125, 5.89312744140625, 10.127605438232422, 4.510169982910156, 20.2349853515625, -12.724090576171875, 12.157562255859375, 18.317214965820312, -8.38409423828125, 14.530364990234375, 7.60833740234375, 14.187240600585938, -1.25030517578125, 15.754119873046875, 16.537080764770508, 18.897003173828125, -18.81549072265625, 12.893058776855469, 11.387359619140625, 7.3997802734375, 13.671195983886719, 10.611915588378906, 16.11383056640625, 8.23095703125, 4.43914794921875, -14.990951538085938, 4.0998382568359375, 27.222198486328125, 9.8380126953125, -2.07537841796875, 22.595245361328125, 0.777923583984375, -10.36328125, -1.298828125, 0.13426971435546875, 14.213836669921875, -1.08203125, -3.869384765625, 5.000572204589844, 7.141883850097656, -16.35552978515625, 20.460418701171875, 11.802589416503906, 4.36370849609375, -16.05181884765625, -3.3681182861328125, 18.74224853515625, -6.66351318359375, 14.563034057617188, -11.74407958984375, 20.968658447265625, -4.3663330078125, 10.9354248046875, -0.2487049102783203, 36.72637939453125, 26.8282470703125, -0.7297534942626953, 19.2188720703125, 22.6683349609375, 0.9945716857910156, 19.143585205078125, 5.129600524902344, 8.681411743164062, 7.43548583984375, -2.9995880126953125, 4.3212432861328125, -3.727813720703125, 5.557044982910156, 17.01458740234375, 4.798343658447266, 1.2816486358642578, 21.27532958984375, -3.359140396118164, 9.960479736328125, 5.36370849609375, -0.9612998962402344, 1.8423614501953125, 2.82708740234375, 0.13958740234375, 11.07586669921875, 24.600372314453125, 12.5810546875, 5.007560729980469, 0.10565948486328125, 11.231063842773438, -17.629913330078125, -28.47136688232422, 2.495391845703125, 11.63455581665039, 1.2244873046875, -6.5516357421875, 17.734832763671875, 0.5818557739257812, 11.324005126953125, 13.604248046875, 17.378448486328125, 21.8109130859375, -1.5797119140625, 0.93310546875, -16.966278076171875, 3.1124114990234375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000075.npy"}
{"epoch": 0.15706806282722513, "step": 76, "batch_size": 128, "mean": 7.146560192108154, "std": 13.240498542785645, "min": -28.41522216796875, "p10": -5.650607299804687, "median": 5.1301422119140625, "p90": 24.952600097656248, "max": 43.2115478515625, "pos_frac": 0.703125, "sample": [18.22607421875, -4.00750732421875, 9.09381103515625, 3.445098876953125, 7.767791748046875, 20.02978515625, -28.41522216796875, 37.609527587890625, -1.9278717041015625, -11.205596923828125, 6.6414794921875, 2.06207275390625, -5.30322265625, 19.5888671875, 0.0, 0.595855712890625, 11.218852996826172, 1.1491470336914062, -3.5410003662109375, 1.1171035766601562, -10.906684875488281, -4.8525390625, 5.48077392578125, -0.8449783325195312, 13.828643798828125, -0.7356586456298828, 4.140167236328125, 4.918212890625, 30.247024536132812, -4.123504638671875, -21.92108154296875, 1.8717498779296875, 2.2599411010742188, 11.446182250976562, 8.588348388671875, 5.641538619995117, 8.629135131835938, 15.925750732421875, 12.267364501953125, 9.74542236328125, 39.73828125, 1.5845184326171875, 1.16229248046875, -0.574981689453125, 5.342071533203125, 0.5495529174804688, 28.06103515625, -13.93682861328125, 13.161705017089844, -6.6904296875, 1.4748077392578125, 22.34002685546875, -8.9010009765625, 3.1892471313476562, 22.759124755859375, 30.004119873046875, 6.146858215332031, 7.3524017333984375, 12.464111328125, 23.822509765625, 19.8206787109375, 8.481201171875, 23.9345703125, 2.1865997314453125, 6.909511566162109, 4.54779052734375, 38.66650390625, -0.08890533447265625, 24.7171630859375, 22.148773193359375, 16.0831298828125, 17.491500854492188, 4.3394622802734375, 28.7545166015625, -2.876434326171875, 10.147003173828125, 43.2115478515625, -27.6341552734375, -5.894927978515625, -15.282470703125, -5.5458984375, -1.4226608276367188, -12.277862548828125, 0.0, 21.8763427734375, 26.155548095703125, 37.54026794433594, 3.79583740234375, 5.7360076904296875, 8.6578369140625, 10.344223022460938, 1.7418670654296875, 2.441791534423828, 10.18548583984375, 6.2096099853515625, 9.879669189453125, 9.12493896484375, -3.14947509765625, 15.87225341796875, 16.4715576171875, -4.57537841796875, 3.3298721313476562, 8.927459716796875, 5.442108154296875, 4.589500427246094, -0.765045166015625, 11.96051025390625, -2.290424346923828, 5.7926177978515625, 42.167449951171875, -7.1971435546875, 25.501953125, 20.4000244140625, 10.457618713378906, 14.266433715820312, 31.398895263671875, -0.290069580078125, -11.31640625, -0.636810302734375, -4.978923797607422, 1.8293380737304688, 11.344833374023438, -2.1682796478271484, 2.4201889038085938, -3.9653892517089844, 1.8451080322265625, -0.51019287109375, 3.6791534423828125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000076.npy"}
{"epoch": 0.15916230366492146, "step": 77, "batch_size": 128, "mean": 5.6925764083862305, "std": 14.548723220825195, "min": -36.643524169921875, "p10": -10.440644836425781, "median": 4.224342346191406, "p90": 26.092977905273436, "max": 50.093963623046875, "pos_frac": 0.65625, "sample": [9.550209045410156, 2.680267333984375, 5.874809265136719, 12.723602294921875, 3.8875885009765625, 20.59869384765625, -0.77886962890625, -1.5930767059326172, 2.367340087890625, 14.454032897949219, -32.704315185546875, 14.634170532226562, -8.485565185546875, 5.149627685546875, 4.77386474609375, 0.22930908203125, -8.077392578125, 14.669830322265625, 5.159385681152344, 5.5507659912109375, -9.808975219726562, -15.979339599609375, 18.0528564453125, 2.955718994140625, -7.64300537109375, 18.18756103515625, -11.219718933105469, -15.561920166015625, -10.472015380859375, 45.53314208984375, 1.0583686828613281, 1.016845703125, 5.87811279296875, -0.8733901977539062, 19.9735107421875, 16.373321533203125, 7.5981903076171875, -2.3292236328125, 11.88427734375, 0.8773880004882812, -3.3387069702148438, -4.494873046875, -8.2830810546875, -12.44085693359375, 27.831695556640625, 50.093963623046875, 15.547592163085938, 9.895500183105469, 26.466583251953125, 6.813362121582031, 13.13671875, 7.941078186035156, -12.0400390625, 5.565948486328125, -0.632781982421875, 10.378173828125, 2.8604354858398438, 10.32427978515625, 31.260345458984375, -17.3623046875, -10.427200317382812, 10.871826171875, -4.567060470581055, 2.50018310546875, -3.7222366333007812, 16.549423217773438, -5.4935302734375, -0.6512603759765625, 1.0395050048828125, -2.9752731323242188, 27.253570556640625, 11.425689697265625, 32.13031005859375, 21.8416748046875, 15.296356201171875, -0.24594879150390625, -22.578399658203125, -0.437591552734375, 5.48828125, 0.7156734466552734, -36.643524169921875, -16.187820434570312, -9.614692687988281, -2.7537841796875, 15.413314819335938, 3.9972381591796875, -1.7960052490234375, 25.932861328125, -13.441055297851562, 16.049945831298828, 10.838233947753906, 4.0762481689453125, 3.799224853515625, 0.105865478515625, 24.73236083984375, 24.056884765625, 13.4251708984375, 16.662567138671875, 27.16705322265625, 9.829374313354492, 6.047142028808594, 11.166717529296875, 3.5162925720214844, 37.3038330078125, 14.348175048828125, 14.604827880859375, -5.275360107421875, 4.3724365234375, -6.0148468017578125, -9.136672973632812, 28.81005859375, 43.72576904296875, 10.6904296875, 3.4200210571289062, -2.752349853515625, 2.3849334716796875, 33.5224609375, -1.817779541015625, 5.909027099609375, -22.648162841796875, -5.217704772949219, 6.6466827392578125, 13.727462768554688, -7.277099609375, 13.9600830078125, 27.07244873046875, -8.903350830078125, 1.1118240356445312], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000077.npy"}
{"epoch": 0.1612565445026178, "step": 78, "batch_size": 128, "mean": 8.773029327392578, "std": 14.792435646057129, "min": -24.178985595703125, "p10": -8.985476684570312, "median": 7.276054382324219, "p90": 29.62200927734375, "max": 62.60223388671875, "pos_frac": 0.7421875, "sample": [-3.4661712646484375, 11.511962890625, -15.99920654296875, 17.038002014160156, 24.65972900390625, 0.407379150390625, 20.6593017578125, -13.611541748046875, 5.135154724121094, 15.796066284179688, 29.549224853515625, 0.887481689453125, 20.425689697265625, 1.0606231689453125, 20.78472900390625, -14.2054443359375, 7.4416046142578125, -14.494377136230469, 13.937919616699219, 12.048370361328125, 2.763397216796875, -16.581817626953125, 18.72186279296875, 14.232177734375, -16.845733642578125, -6.179616928100586, 30.273117065429688, 29.791839599609375, 0.2618408203125, 26.491928100585938, 20.5625, 4.057365417480469, 33.803192138671875, 1.06268310546875, 10.885086059570312, 21.511962890625, 17.360549926757812, 34.655059814453125, -12.014312744140625, -24.178985595703125, 0.0, 25.8836669921875, -17.216903686523438, 10.794898986816406, 5.085075378417969, 6.968364715576172, 17.132568359375, -2.1943359375, 11.41168212890625, -2.454620361328125, 38.76568603515625, 14.259658813476562, -1.9989013671875, 2.471050262451172, -8.6168212890625, 38.563018798828125, -5.3719024658203125, 6.3928680419921875, 0.7574729919433594, 5.97760009765625, 24.52362060546875, 33.61309814453125, 23.715667724609375, -7.19683837890625, -11.287628173828125, 14.860511779785156, -1.2775497436523438, 2.7451820373535156, 8.89459228515625, 19.690216064453125, 4.82110595703125, 62.60223388671875, 5.746726989746094, 5.104911804199219, 22.47869873046875, 1.9754104614257812, 5.85028076171875, 10.247161865234375, 11.322128295898438, -3.351318359375, 7.18194580078125, 7.3701629638671875, 36.9044189453125, 16.129348754882812, 9.71234130859375, 10.778656005859375, 13.49859619140625, -5.4395751953125, -9.845672607421875, 24.33740234375, 3.1955814361572266, -8.043258666992188, 0.774139404296875, 1.1066246032714844, 40.404754638671875, 1.9118919372558594, 13.44375228881836, -7.5741119384765625, -2.2525558471679688, 12.66162109375, 36.67156982421875, -4.862274169921875, 10.86273193359375, 8.673049926757812, -10.613693237304688, 11.288742065429688, -6.667694091796875, 21.16619873046875, 38.00274658203125, -3.75946044921875, 8.253448486328125, 17.9342041015625, 5.274492263793945, 8.520221710205078, -1.2733535766601562, 1.8228683471679688, 5.596626281738281, 1.0269889831542969, 34.11163330078125, 8.1065673828125, 21.442489624023438, -5.940116882324219, -16.020263671875, 0.5261383056640625, 14.188583374023438, 8.868728637695312, 7.087249755859375, 24.51434326171875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000078.npy"}
{"epoch": 0.16335078534031414, "step": 79, "batch_size": 128, "mean": 8.194294929504395, "std": 15.432801246643066, "min": -47.039306640625, "p10": -8.230525398254395, "median": 5.9877166748046875, "p90": 30.14607543945312, "max": 57.451812744140625, "pos_frac": 0.7109375, "sample": [-3.4492721557617188, 6.8868560791015625, 57.451812744140625, -12.45904541015625, 17.817138671875, 9.816986083984375, 12.002006530761719, 1.4702529907226562, -5.0643310546875, -5.6291961669921875, -17.368682861328125, 2.4150390625, 0.1562957763671875, 4.23626708984375, 10.9892578125, -8.65655517578125, 29.3544921875, -5.881561279296875, 20.633941650390625, 6.008064270019531, 2.291269302368164, 36.274566650390625, -3.52288818359375, 33.15234375, -3.7348175048828125, 36.4439697265625, 31.93841552734375, 19.320465087890625, -2.74920654296875, -0.08647918701171875, 11.410970687866211, 16.587059020996094, 7.67999267578125, -3.84783935546875, 2.2860870361328125, 0.03432464599609375, 2.5423583984375, 10.785385131835938, 6.785186767578125, -1.144134521484375, 13.405120849609375, -22.10693359375, -1.2119979858398438, 0.6587390899658203, -47.039306640625, -13.33599853515625, 12.649795532226562, 22.712738037109375, 8.0228271484375, 17.986083984375, -10.02886962890625, 11.847602844238281, 24.92449951171875, 29.8704833984375, 9.617340087890625, 9.660476684570312, 6.721221923828125, 14.73046875, 8.13604736328125, 1.2366828918457031, 16.552032470703125, 24.7164306640625, -7.4180755615234375, 14.10992431640625, 2.9321136474609375, 2.20782470703125, 35.987548828125, -1.33905029296875, 18.725799560546875, -19.94153594970703, 16.358917236328125, -2.0370941162109375, -10.574462890625, 5.984649658203125, 9.019912719726562, 0.0, 5.99078369140625, 23.971343994140625, 21.11065673828125, 49.442962646484375, 5.525730133056641, 4.41961669921875, -0.28167724609375, -8.3681640625, 0.713836669921875, 4.753728866577148, 15.501556396484375, 7.263275146484375, -0.9190101623535156, 3.12310791015625, 1.2006416320800781, 10.691509246826172, -1.4715557098388672, 18.1085205078125, -13.0966796875, 36.00177001953125, 0.0, 3.098876953125, -8.171537399291992, -6.333765029907227, 1.2768173217773438, 21.229446411132812, 6.183748245239258, 35.09002685546875, 24.973190307617188, 7.709341049194336, -20.6356201171875, 14.2803955078125, -3.3517494201660156, 4.40289306640625, 14.89581298828125, 27.75787353515625, 26.70721435546875, 4.97802734375, 28.32147216796875, 3.9091644287109375, 30.78912353515625, 1.7542572021484375, -4.625823974609375, 34.632598876953125, 5.2796478271484375, 7.2416534423828125, 5.671741485595703, 37.68798828125, 8.782341003417969, -8.5421142578125, -7.30078125, 40.576812744140625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000079.npy"}
{"epoch": 0.16544502617801046, "step": 80, "batch_size": 128, "mean": 6.554655075073242, "std": 14.130816459655762, "min": -27.59564208984375, "p10": -8.791023254394531, "median": 4.5688629150390625, "p90": 25.278100585937498, "max": 59.75506591796875, "pos_frac": 0.6484375, "sample": [4.876567840576172, 14.019073486328125, -3.426239013671875, 13.304832458496094, 3.1190185546875, -3.2472476959228516, -0.6951904296875, 3.6724624633789062, 0.3652801513671875, 24.586639404296875, 12.921981811523438, 8.56829833984375, 1.7552490234375, -6.113533020019531, -3.7250595092773438, -2.692169189453125, -1.67230224609375, 4.567657470703125, 17.842239379882812, 10.337516784667969, -2.3776397705078125, 5.139533996582031, -18.188430786132812, -1.4688262939453125, 15.606903076171875, 7.7720947265625, -10.613189697265625, 32.579315185546875, 0.5953292846679688, -7.748294830322266, 4.570068359375, -8.899917602539062, 5.37896728515625, -5.3137359619140625, -2.7651519775390625, -27.1544189453125, -27.59564208984375, 25.2010498046875, 0.0, 27.712005615234375, 35.705352783203125, -3.4606399536132812, 6.23944091796875, -0.2290191650390625, 9.292015075683594, 19.819000244140625, 13.591278076171875, -1.509176254272461, 4.223838806152344, 8.640157699584961, -8.209197998046875, 8.034881591796875, -15.60906982421875, 36.85894775390625, -15.339836120605469, 24.498779296875, 18.96405029296875, 22.29095458984375, -13.35308837890625, 16.9239501953125, 9.888269424438477, -1.161041259765625, 6.539070129394531, -8.744354248046875, 24.01629638671875, -4.3391876220703125, 1.2771415710449219, 0.5575714111328125, -3.7730865478515625, 20.36688232421875, -0.5611419677734375, 2.99609375, 7.7738189697265625, 19.01123046875, 12.767822265625, -7.438201904296875, -3.0406532287597656, -0.16170501708984375, -2.438568115234375, 30.863525390625, 9.880485534667969, -9.884140014648438, 59.75506591796875, 1.7913818359375, 23.51202392578125, 21.6380615234375, 3.2015838623046875, -7.1998291015625, -1.995330810546875, 27.1065673828125, 25.076995849609375, 9.035354614257812, 23.48980712890625, 36.196075439453125, 13.1422119140625, 12.597213745117188, 0.602142333984375, 9.52801513671875, 3.6906051635742188, -6.558616638183594, 8.600006103515625, -18.795166015625, -3.62158203125, 12.051315307617188, 10.121417999267578, 28.15350341796875, 5.78265380859375, 2.3787994384765625, 4.5312652587890625, 7.636772155761719, 6.385065078735352, -9.647796630859375, -2.9698638916015625, 1.8239059448242188, 16.014938354492188, -18.12762451171875, -3.5211181640625, 28.548370361328125, -9.35205078125, 10.09027099609375, 26.874099731445312, 7.72900390625, 6.040525436401367, 25.4578857421875, 42.560546875, 1.3139495849609375, 2.462615966796875, 13.30096435546875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000080.npy"}
{"epoch": 0.16753926701570682, "step": 81, "batch_size": 128, "mean": 7.12375545501709, "std": 17.16427230834961, "min": -44.215789794921875, "p10": -12.505569458007812, "median": 6.116158485412598, "p90": 31.118789672851555, "max": 55.21636962890625, "pos_frac": 0.6953125, "sample": [0.9472503662109375, -1.2003917694091797, 20.578414916992188, -20.43035888671875, 19.953285217285156, 2.417633056640625, -12.487396240234375, 5.85821533203125, 12.268798828125, 28.079315185546875, 10.74591064453125, 11.066619873046875, 17.9937744140625, 55.21636962890625, 7.00262451171875, 7.9712982177734375, -3.182180404663086, 10.437599182128906, -4.01556396484375, -0.5476303100585938, 6.2875213623046875, 1.698526382446289, 33.9881591796875, 35.705780029296875, -23.5491943359375, 10.46759033203125, 34.2137451171875, -10.86767578125, 2.5826663970947266, -1.8273162841796875, 3.3224029541015625, 24.016799926757812, 3.4565582275390625, 0.37969970703125, -15.79034423828125, 7.809471130371094, -14.9168701171875, 42.38134765625, 29.17034912109375, 10.068893432617188, 8.346000671386719, 37.98065185546875, 9.305503845214844, -6.239311218261719, -8.892845153808594, 23.469879150390625, -15.424232482910156, -5.435554504394531, 7.331733703613281, 4.7989349365234375, -24.600494384765625, 9.089099884033203, 34.929779052734375, 20.93389892578125, 12.404243469238281, -4.3988037109375, 8.9461669921875, 9.462753295898438, 5.014518737792969, -12.5479736328125, 26.640594482421875, -2.553974151611328, 33.472503662109375, 7.309356689453125, 42.57904052734375, 30.347137451171875, 19.392868041992188, 21.078857421875, -9.82122802734375, -11.98602294921875, 23.980224609375, -1.7315597534179688, 6.9743194580078125, 1.0481681823730469, -2.4612884521484375, 17.292404174804688, 8.7762451171875, 27.307540893554688, 4.1550445556640625, -35.475433349609375, -19.907516479492188, 32.9193115234375, 30.25054931640625, 23.86761474609375, 26.104949951171875, 6.77507209777832, -1.3369140625, 1.97735595703125, 0.5507659912109375, 5.944795608520508, -17.48907470703125, -8.411407470703125, 7.07135009765625, 37.1556396484375, 38.60198974609375, 21.855690002441406, 14.387290954589844, -6.610084533691406, 16.70220947265625, 1.0282707214355469, -26.841690063476562, 0.0599517822265625, -44.215789794921875, -11.05963134765625, 23.414276123046875, 33.728302001953125, 27.73388671875, -7.5399169921875, -9.09625244140625, 0.23590850830078125, 2.479827880859375, 6.60284423828125, -2.337890625, 14.089614868164062, 4.070764541625977, 14.37164306640625, 7.382118225097656, 17.50299072265625, -11.374916076660156, 16.457733154296875, -25.388275146484375, -5.804443359375, 3.1906051635742188, 2.3414993286132812, 5.4287109375, 1.5406570434570312, 2.6363754272460938, -7.2764739990234375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000081.npy"}
{"epoch": 0.16963350785340314, "step": 82, "batch_size": 128, "mean": 7.860725402832031, "std": 16.829322814941406, "min": -34.808013916015625, "p10": -10.561247253417969, "median": 7.0228729248046875, "p90": 24.67516174316406, "max": 58.800506591796875, "pos_frac": 0.71875, "sample": [4.7607421875, 23.900115966796875, -0.7570037841796875, -10.572708129882812, 22.444656372070312, 13.824951171875, 27.62762451171875, 16.10626220703125, 27.088836669921875, 12.160263061523438, 16.9608154296875, 17.076995849609375, 44.73492431640625, -9.07293701171875, -3.797698974609375, 58.800506591796875, -14.22039794921875, 4.9802398681640625, -7.961334228515625, -6.038841247558594, -10.55633544921875, 20.075592041015625, 19.83660888671875, 22.17718505859375, 5.806877136230469, 55.83740234375, -8.555633544921875, 24.3953857421875, 18.118309020996094, 18.22863006591797, -15.768218994140625, 11.015533447265625, 4.909889221191406, 11.87164306640625, 10.046142578125, 57.2716064453125, 0.3795013427734375, 7.5675811767578125, 14.592987060546875, -34.808013916015625, 3.8516693115234375, -14.166351318359375, 15.119522094726562, -4.554267883300781, 14.902305603027344, 9.943611145019531, 10.04632568359375, 0.3588905334472656, 26.211639404296875, 13.256202697753906, -1.511037826538086, 22.301620483398438, 12.339752197265625, 0.6568813323974609, 5.355781555175781, 12.436172485351562, 7.0518951416015625, 12.581287384033203, 19.593017578125, 14.404342651367188, 40.860107421875, 0.122344970703125, 18.279815673828125, 15.467269897460938, 15.67041015625, -5.2604522705078125, -4.680023193359375, 9.40771484375, -16.941726684570312, -2.2227630615234375, 6.6351318359375, 11.14593505859375, 2.698650360107422, 26.29376220703125, -26.926361083984375, 10.237945556640625, 7.625732421875, -5.012481689453125, 3.9498138427734375, 2.6728363037109375, 8.91033935546875, 7.657073974609375, -10.2694091796875, 9.418289184570312, -2.42535400390625, 6.709482192993164, 2.6642932891845703, 0.342315673828125, 6.9938507080078125, 10.840179443359375, 6.403221130371094, 2.5264129638671875, -7.9166259765625, 24.342498779296875, 25.327972412109375, 7.4807281494140625, -7.4608154296875, -21.331130981445312, -1.491668701171875, 21.47064208984375, -13.959938049316406, 51.893341064453125, -16.209136962890625, 45.6995849609375, -3.583709716796875, 3.35650634765625, 54.27545166015625, -4.8477630615234375, 21.504150390625, -18.197265625, -31.16546630859375, 3.954986572265625, 6.546722412109375, 5.049324035644531, 8.975418090820312, 14.779190063476562, -6.3875732421875, 6.1774749755859375, -4.724273681640625, 16.115875244140625, 9.719871520996094, 23.008056640625, 4.1303253173828125, 7.383392333984375, 2.076223373413086, 2.9637298583984375, -10.3880615234375, -22.857498168945312], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000082.npy"}
{"epoch": 0.17172774869109947, "step": 83, "batch_size": 128, "mean": 8.663426399230957, "std": 19.148483276367188, "min": -58.023529052734375, "p10": -10.35099868774414, "median": 7.206718444824219, "p90": 35.48095703124999, "max": 52.114013671875, "pos_frac": 0.7265625, "sample": [11.89276123046875, 0.0, 34.13421630859375, -0.307525634765625, 22.5626220703125, 4.959098815917969, -11.3680419921875, 32.30029296875, 18.58453369140625, 49.07513427734375, 10.447944641113281, 11.530351638793945, 13.303558349609375, -3.5181808471679688, 13.356491088867188, 19.25616455078125, 4.03594970703125, 5.17047119140625, -6.19305419921875, 6.0621490478515625, 37.33966064453125, 9.91357421875, 12.200210571289062, 5.959259033203125, -9.915122985839844, 32.227203369140625, 14.495834350585938, 0.88568115234375, 20.788604736328125, 36.51898193359375, 2.09918212890625, 4.097869873046875, 13.931411743164062, 1.6567306518554688, -9.792068481445312, 27.6925048828125, 0.7474899291992188, 4.938934326171875, 43.07737731933594, 7.94580078125, 19.20806884765625, 20.121841430664062, 2.295684814453125, 19.803253173828125, 46.334442138671875, 15.737060546875, -49.920745849609375, 0.380523681640625, -23.583633422851562, 13.787368774414062, -19.344970703125, 14.150970458984375, 30.444732666015625, 1.164306640625, 18.55255126953125, 27.726959228515625, -45.03759765625, -17.816925048828125, 16.14224624633789, 43.0072021484375, 8.974761962890625, 36.49609375, 41.583282470703125, -3.7447052001953125, 13.129241943359375, -5.9076690673828125, -6.61041259765625, -58.023529052734375, 10.174118041992188, -2.3016738891601562, 5.286836624145508, 2.9543914794921875, 26.339111328125, -12.6685791015625, 5.88287353515625, -0.3252449035644531, 18.182464599609375, 13.8170166015625, 36.91595458984375, -5.863494873046875, 9.532135009765625, -0.80731201171875, -14.44073486328125, 9.066375732421875, 2.2829742431640625, -3.7488861083984375, 10.21441650390625, 16.298606872558594, -28.871734619140625, 24.661376953125, -0.1866168975830078, -6.140251159667969, 2.8207473754882812, 0.332977294921875, 14.565887451171875, -1.707366943359375, 5.953788757324219, -41.163360595703125, 0.892242431640625, 3.042388916015625, -3.9094696044921875, 35.0458984375, 3.4940948486328125, 2.74969482421875, -8.11273193359375, 42.593841552734375, 38.34747314453125, -13.868988037109375, -3.4115982055664062, 20.741432189941406, 14.0142822265625, 28.872222900390625, 52.114013671875, -21.122802734375, 12.86309814453125, 26.357650756835938, 31.466995239257812, -6.8431396484375, 17.0306396484375, -4.812286376953125, 3.453704833984375, 2.79071044921875, 9.33469009399414, 43.391357421875, 6.4676361083984375, 13.475852966308594, 0.89404296875, 13.364356994628906], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000083.npy"}
{"epoch": 0.17382198952879582, "step": 84, "batch_size": 128, "mean": 7.32763671875, "std": 17.638031005859375, "min": -51.006011962890625, "p10": -14.711575317382813, "median": 7.243755340576172, "p90": 29.494784545898437, "max": 58.043121337890625, "pos_frac": 0.6875, "sample": [1.6095867156982422, 15.420562744140625, 45.46978759765625, -3.2173233032226562, -51.006011962890625, 30.19610595703125, 12.031097412109375, 3.6492996215820312, 1.0559768676757812, -5.0318756103515625, 5.359333038330078, 22.956085205078125, 21.16229248046875, 14.23260498046875, 58.043121337890625, -28.1776123046875, 30.389312744140625, -1.536773681640625, 8.138916015625, -2.7053298950195312, 17.333099365234375, 3.420473098754883, 14.16839599609375, 10.756637573242188, -0.23090362548828125, 3.357574462890625, -7.645881652832031, 4.168462753295898, 3.547727584838867, 42.917449951171875, 43.41168212890625, 29.40673828125, -2.8863525390625, -18.177490234375, 12.18316650390625, 15.0428466796875, 15.3236083984375, -2.0745468139648438, 29.700225830078125, 3.1676063537597656, -5.008613586425781, 18.82391357421875, 9.611907958984375, 14.16693115234375, 33.943695068359375, 14.814376831054688, 11.623992919921875, -18.683212280273438, 2.72967529296875, 6.93023681640625, -14.692459106445312, 2.4169921875, 7.3818359375, -2.7217769622802734, 5.68682861328125, -6.216583251953125, -36.286285400390625, -1.15106201171875, -7.83026123046875, 15.436553955078125, -16.2784423828125, -14.756179809570312, 27.493072509765625, 14.12786865234375, 10.186958312988281, -10.156524658203125, 17.545455932617188, 18.0228271484375, -1.671844482421875, -1.54931640625, 15.98846435546875, -7.108734130859375, 21.31353759765625, 13.0562744140625, -1.2278251647949219, 14.75018310546875, 14.83819580078125, -9.354034423828125, 13.543121337890625, 5.9146728515625, 4.1368408203125, 2.0002593994140625, 0.6526107788085938, 28.809844970703125, 18.025840759277344, 15.200180053710938, -10.842559814453125, 34.83416748046875, 46.749053955078125, 3.496124267578125, 27.626983642578125, 5.814506530761719, 3.5000686645507812, 16.80169677734375, 50.499176025390625, -11.965782165527344, 6.007652282714844, -0.9803543090820312, 8.864990234375, 10.91512680053711, 24.24664306640625, -25.106277465820312, 10.177505493164062, -0.5150909423828125, 17.424072265625, 12.37091064453125, -2.0819091796875, -23.86773681640625, 11.552825927734375, -8.519668579101562, 3.957977294921875, 8.203399658203125, 14.518993377685547, 37.420379638671875, -28.854095458984375, 12.745849609375, -31.07989501953125, 25.848812103271484, 32.70367431640625, 4.9412078857421875, -21.033447265625, 10.744537353515625, -7.745658874511719, 7.105674743652344, 13.700050354003906, 15.142745971679688, 13.76959228515625, -20.56414794921875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000084.npy"}
{"epoch": 0.17591623036649215, "step": 85, "batch_size": 128, "mean": 5.667843818664551, "std": 16.384721755981445, "min": -34.03558349609375, "p10": -15.416452026367187, "median": 5.768136978149414, "p90": 27.353724670410152, "max": 54.892425537109375, "pos_frac": 0.640625, "sample": [4.6712646484375, -7.53973388671875, -0.6972618103027344, 33.18995666503906, -6.804290771484375, 10.4888916015625, 18.887054443359375, 31.243499755859375, 24.595733642578125, 22.11520767211914, 27.011459350585938, 9.987876892089844, 3.8862152099609375, 33.16339111328125, 0.662872314453125, -18.9481201171875, 23.830772399902344, 17.65301513671875, 37.044219970703125, 18.32513427734375, -11.932731628417969, 6.326202392578125, 10.01324462890625, 5.459846496582031, 6.24267578125, 8.88739013671875, 4.73760986328125, 17.172958374023438, -1.957611083984375, 18.207183837890625, -17.102066040039062, 1.43817138671875, 6.604999542236328, 22.237998962402344, -5.685516357421875, -9.347412109375, -11.913093566894531, 23.689697265625, -0.2733154296875, -3.578460693359375, 4.559783935546875, 1.492095947265625, 38.97077941894531, 33.384063720703125, 7.73198127746582, 10.842041015625, 6.152753829956055, -4.1515350341796875, -10.45062255859375, -21.557891845703125, -7.0101318359375, 17.551349639892578, -9.034725189208984, 14.7508544921875, 1.2681522369384766, -3.097015380859375, 36.291351318359375, -30.414825439453125, -15.56793212890625, 20.410919189453125, 17.227943420410156, 8.400711059570312, 54.892425537109375, -11.119873046875, 34.165771484375, 18.865814208984375, 4.269060134887695, -1.7907371520996094, 30.159698486328125, 18.3477783203125, -15.351531982421875, 18.32276153564453, -2.9532203674316406, -13.35333251953125, 14.271163940429688, -7.1642608642578125, 15.286178588867188, 3.5165061950683594, 11.011665344238281, 10.972053527832031, -28.482452392578125, -8.092742919921875, -10.878211975097656, -5.7083740234375, -13.390975952148438, -20.268951416015625, -28.658615112304688, 4.227874755859375, 11.593513488769531, -4.2216796875, -18.492080688476562, 9.47262954711914, -3.69793701171875, 10.859298706054688, -6.657783508300781, 5.1898193359375, 7.20208740234375, 21.096038818359375, 19.48968505859375, -5.19927978515625, 24.048919677734375, -34.03558349609375, 6.076427459716797, 6.740150451660156, -11.13128662109375, 35.227325439453125, 13.13153076171875, 1.610992431640625, -16.529541015625, 10.967796325683594, 10.650222778320312, -4.6114044189453125, 25.114044189453125, 11.923446655273438, 2.4190139770507812, 16.564285278320312, 31.900054931640625, 3.8224105834960938, -15.611358642578125, -26.5819091796875, 2.657562255859375, 3.318704605102539, 28.15234375, -2.691730499267578, 6.08154296875, -10.949623107910156, 10.165908813476562, 9.57889175415039], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000085.npy"}
{"epoch": 0.17801047120418848, "step": 86, "batch_size": 128, "mean": 8.767333030700684, "std": 17.814699172973633, "min": -45.3662109375, "p10": -9.435186767578124, "median": 9.752548217773438, "p90": 29.897811889648438, "max": 59.691009521484375, "pos_frac": 0.6640625, "sample": [16.958877563476562, -45.3662109375, 16.48199462890625, -18.8238525390625, -4.5360260009765625, 29.5552978515625, 47.3287353515625, 24.93255615234375, -16.41290283203125, -5.930511474609375, -1.9012451171875, -2.598888397216797, 30.16131591796875, 4.982841491699219, -24.29962158203125, 8.56463623046875, 20.093414306640625, 6.623046875, 0.0, -2.104705810546875, 15.314453125, 18.76751708984375, 14.009521484375, 12.320281982421875, 1.8021240234375, 13.491851806640625, 22.94873046875, -1.3600006103515625, 30.354766845703125, 3.290985107421875, 26.778594970703125, -7.689262390136719, 20.198486328125, -3.9276123046875, 21.29412841796875, 7.423931121826172, 20.51995849609375, -41.181488037109375, 20.268798828125, 36.349609375, 5.0279388427734375, 11.135385513305664, -0.9646682739257812, -1.68048095703125, 16.4976806640625, 11.031692504882812, 17.292572021484375, -5.7475738525390625, -0.23195648193359375, 16.201324462890625, 32.81085205078125, 24.22869873046875, -5.762115478515625, -10.309844970703125, 1.1862106323242188, 28.062225341796875, 5.600074768066406, -19.332275390625, -39.61932373046875, 12.577239990234375, -2.452606201171875, 22.508560180664062, -0.40602684020996094, 6.235870361328125, 21.2261962890625, 9.657562255859375, 59.691009521484375, -2.16619873046875, -1.6574554443359375, -12.26318359375, 17.991111755371094, -7.065277099609375, 39.746368408203125, -2.837188720703125, -29.7967529296875, 8.37841796875, 29.784881591796875, -24.2364501953125, 12.057777404785156, 15.8880615234375, -26.659088134765625, 24.25360870361328, 2.580446243286133, -2.0294952392578125, 36.9918212890625, 15.69512939453125, -2.8749618530273438, 49.01007080078125, -4.82305908203125, 16.509963989257812, 1.9713401794433594, 12.275604248046875, 3.134918212890625, 15.7894287109375, 12.875732421875, 13.92431640625, 6.533849716186523, 9.8475341796875, 14.66036605834961, -5.595415115356445, 29.4075927734375, 0.948516845703125, 23.554771423339844, 26.453369140625, 37.214111328125, 18.518386840820312, -8.898468017578125, 22.14703369140625, 30.541229248046875, 30.711410522460938, 6.18939208984375, 23.148773193359375, -4.853752136230469, -9.060333251953125, 41.01611328125, 27.6346435546875, -0.545440673828125, 16.166259765625, -0.5448532104492188, 11.47882080078125, 4.654083251953125, 2.1646499633789062, 13.74884033203125, -21.78656005859375, -0.6639404296875, 8.90716552734375, 16.66790771484375, 10.254348754882812], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000086.npy"}
{"epoch": 0.18010471204188483, "step": 87, "batch_size": 128, "mean": 10.337986946105957, "std": 19.74123764038086, "min": -50.706573486328125, "p10": -8.569298553466798, "median": 5.406196594238281, "p90": 36.89910278320311, "max": 68.9931640625, "pos_frac": 0.6796875, "sample": [11.660751342773438, -0.76470947265625, -4.314971923828125, 25.78704833984375, 17.9744873046875, 35.51513671875, 22.733749389648438, 13.798858642578125, 0.17138099670410156, 8.36529541015625, 42.143707275390625, 8.094482421875, 48.978729248046875, -0.6578216552734375, 13.885147094726562, -0.27327728271484375, 14.735733032226562, 49.701416015625, 13.391403198242188, 0.0, 19.03204345703125, 7.5318603515625, -20.56964111328125, 25.72833251953125, -2.103485107421875, -7.336456298828125, -8.091995239257812, -12.616012573242188, 2.1302490234375, -7.0629730224609375, -7.8096923828125, 1.3349456787109375, 25.645751953125, 0.0, 9.787446975708008, -7.8060455322265625, 19.84827423095703, 2.4820556640625, 22.724395751953125, 10.11566162109375, 3.83770751953125, -19.468154907226562, -1.257781982421875, 22.9842529296875, 34.5599365234375, 29.11322021484375, 60.125091552734375, 58.111724853515625, 3.63983154296875, 16.261077880859375, 3.1854095458984375, -0.565673828125, 2.239349365234375, 26.545318603515625, -9.217689514160156, -7.37322998046875, -8.632514953613281, 26.5806884765625, 3.099029541015625, 14.134517669677734, 41.94561767578125, 4.587379455566406, 0.29443359375, -0.17919158935546875, 18.882022857666016, 9.706787109375, 1.67999267578125, 24.401412963867188, -2.1891632080078125, 2.905120849609375, 26.0648193359375, -2.476165771484375, 2.1610107421875, -10.036285400390625, -6.2889251708984375, 16.073699951171875, 17.916534423828125, 20.676300048828125, 68.9931640625, 40.12835693359375, 43.3209228515625, 23.447113037109375, 25.275421142578125, 4.7718048095703125, 26.072998046875, -50.706573486328125, -0.737060546875, -45.313079833984375, 10.849822998046875, 25.79205322265625, 9.287307739257812, 2.5043106079101562, 20.09844970703125, -3.307708740234375, -12.282440185546875, 2.5001163482666016, -19.6309814453125, 19.500564575195312, 54.33074951171875, -2.67791748046875, -0.7842941284179688, -3.152069091796875, 51.88664245605469, -18.563568115234375, 46.549652099609375, 6.3468475341796875, 28.7086181640625, -26.973297119140625, -15.4815673828125, -1.2726593017578125, 3.3607730865478516, 20.028968811035156, 9.100234985351562, 2.1576309204101562, 26.0064697265625, -1.4658584594726562, 2.6007556915283203, 1.4634933471679688, 31.600433349609375, 42.359832763671875, 9.54330825805664, -8.542205810546875, 4.5359954833984375, 6.04058837890625, 12.582778930664062, 0.1957244873046875, 35.1456298828125, -0.8527450561523438], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000087.npy"}
{"epoch": 0.18219895287958116, "step": 88, "batch_size": 128, "mean": 8.80776596069336, "std": 19.576295852661133, "min": -66.28384399414062, "p10": -11.336094665527343, "median": 4.51715087890625, "p90": 37.82471771240234, "max": 65.01388549804688, "pos_frac": 0.6796875, "sample": [2.487091064453125, 31.0531005859375, 33.60772705078125, -3.4132232666015625, -7.0751953125, 22.06170654296875, -4.1971893310546875, 4.012371063232422, -1.1445236206054688, 6.49163818359375, 4.041618347167969, -13.59234619140625, 4.553028106689453, 6.1144561767578125, 0.0, 12.4688720703125, 25.003498077392578, 0.19183349609375, 1.1178359985351562, 23.591217041015625, -1.16241455078125, -8.9161376953125, 5.205078125, 40.5191650390625, 18.907424926757812, 27.354888916015625, 15.920257568359375, -1.9548358917236328, 6.185943603515625, 5.464569091796875, 53.025146484375, 7.942848205566406, 42.3878173828125, 2.9475784301757812, -25.321624755859375, -0.6329727172851562, 38.342041015625, 0.0, 0.670867919921875, 9.49798583984375, 51.75935363769531, -12.145950317382812, 38.06353759765625, -7.2379913330078125, 10.46356201171875, 21.5980224609375, 27.504119873046875, -10.867279052734375, 2.8202285766601562, 26.87255859375, 12.980743408203125, -12.315155029296875, 4.160491943359375, 7.495758056640625, -1.149953842163086, 23.48797607421875, 5.6568603515625, 1.2091064453125, -3.45880126953125, 3.8556365966796875, -7.849571228027344, 4.481273651123047, 0.3289642333984375, -9.20751953125, 28.298492431640625, 11.374946594238281, 9.549930572509766, -10.998931884765625, 10.237335205078125, 3.6450347900390625, 43.99806213378906, -7.280059814453125, -3.3671875, 18.678726196289062, -11.028518676757812, -18.69745635986328, 1.4037513732910156, -0.9841804504394531, 10.982330322265625, 19.994140625, 14.05963134765625, 0.8697071075439453, 5.1887969970703125, -33.213836669921875, -9.072357177734375, 41.778778076171875, 29.39190673828125, 2.2427005767822266, 36.172889709472656, 21.232460021972656, -4.024009704589844, 10.396240234375, -66.28384399414062, 1.802215576171875, -2.66400146484375, 19.91155242919922, 4.23724365234375, 65.01388549804688, -9.339946746826172, 8.471038818359375, 54.651885986328125, 41.762481689453125, 12.59423828125, -14.4234619140625, 51.33880615234375, -19.284011840820312, -17.441162109375, 37.72236633300781, 29.974197387695312, 1.8901615142822266, -3.6090011596679688, -23.13074493408203, 16.009918212890625, 3.6702346801757812, -2.750732421875, -12.05377197265625, 15.22552490234375, -13.595458984375, 25.895797729492188, 1.7040863037109375, 0.8838043212890625, 15.383270263671875, 21.483779907226562, 46.89599609375, 11.17669677734375, 13.12860107421875, 34.19642639160156, -6.146400451660156], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000088.npy"}
{"epoch": 0.18429319371727748, "step": 89, "batch_size": 128, "mean": 9.297195434570312, "std": 19.33790397644043, "min": -50.23272705078125, "p10": -12.64241943359375, "median": 7.197380065917969, "p90": 36.10527038574217, "max": 62.331207275390625, "pos_frac": 0.7421875, "sample": [1.1268997192382812, -28.359375, 26.52349853515625, 9.880401611328125, 0.73211669921875, 33.4239501953125, 33.807159423828125, 62.331207275390625, 4.560142517089844, -23.113479614257812, 13.499420166015625, 8.36376953125, -2.00140380859375, -4.2061309814453125, 0.5023117065429688, 19.36773681640625, 12.6834716796875, 12.430488586425781, -14.083969116210938, -8.129180908203125, 38.8023681640625, 1.9496383666992188, 6.5813751220703125, 7.0288543701171875, 5.82135009765625, 14.283660888671875, 17.48328399658203, 40.517303466796875, 26.50823974609375, 40.54132080078125, -12.61517333984375, 13.323474884033203, 5.86541748046875, 41.170257568359375, 0.49292945861816406, 15.037849426269531, 50.737548828125, -32.212188720703125, -2.3706512451171875, 10.508209228515625, 15.040863037109375, 23.55706787109375, 16.600051879882812, 2.2144412994384766, 7.46881103515625, -3.1756744384765625, 22.097442626953125, 25.550872802734375, -0.03441619873046875, -3.7354278564453125, 4.839424133300781, 4.700653076171875, 26.05035400390625, 3.3131656646728516, -2.993194580078125, -9.043487548828125, 13.4827880859375, 50.283447265625, -0.3732147216796875, 46.083892822265625, 9.350738525390625, -12.70599365234375, 40.95355224609375, -42.879364013671875, 5.67816162109375, 19.046829223632812, 1.95751953125, 9.66949462890625, -50.23272705078125, -14.55218505859375, 1.956298828125, -37.5067138671875, 22.261032104492188, -1.3738479614257812, 13.487945556640625, 2.751800537109375, 31.422576904296875, -3.9254016876220703, 20.410133361816406, 30.728424072265625, 2.4998245239257812, 27.161575317382812, -33.60682678222656, -1.3501434326171875, 4.057518005371094, 9.307586669921875, -16.143537521362305, -5.37603759765625, -5.7758026123046875, 41.18206787109375, 18.718528747558594, 5.83135986328125, -2.288330078125, 34.949371337890625, 5.8083953857421875, -0.1842803955078125, 6.031829833984375, 25.755645751953125, 6.495391845703125, 22.964828491210938, 7.36590576171875, 24.13525390625, 41.367401123046875, -25.106704711914062, 4.8480682373046875, 7.6511688232421875, 0.4582977294921875, 2.9881362915039062, 7.620140075683594, 25.004302978515625, 17.153717041015625, 15.213729858398438, 9.958137512207031, 29.55364990234375, 46.58245849609375, -22.5047607421875, 12.1185302734375, 9.433090209960938, 13.707931518554688, 6.512947082519531, 44.28782653808594, 1.5839309692382812, 3.6956558227539062, 19.23688507080078, 2.3781089782714844, 11.59027099609375, -5.5908660888671875, -4.46148681640625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000089.npy"}
{"epoch": 0.18638743455497384, "step": 90, "batch_size": 128, "mean": 8.221597671508789, "std": 18.476818084716797, "min": -45.4818115234375, "p10": -10.717495727539061, "median": 7.368511199951172, "p90": 29.688420104980466, "max": 55.370513916015625, "pos_frac": 0.6328125, "sample": [14.236679077148438, 0.813690185546875, -3.3136367797851562, 52.680816650390625, 40.86187744140625, 10.621185302734375, 21.83123779296875, 26.72637939453125, 35.88932800292969, 39.270355224609375, 17.0863037109375, 11.259696960449219, 25.023117065429688, -0.7874755859375, 26.826950073242188, -7.1157989501953125, 23.81060791015625, -8.209426879882812, 16.322509765625, 8.061233520507812, -11.198638916015625, 7.1194000244140625, 15.341949462890625, 26.211624145507812, 19.062255859375, -45.4818115234375, 24.88116455078125, -0.996490478515625, -8.44598388671875, 31.83221435546875, -1.14959716796875, 2.408445358276367, -10.51129150390625, -2.03948974609375, -9.479827880859375, 7.1036224365234375, -8.738739013671875, -1.40277099609375, -5.48974609375, 12.562530517578125, 16.2503662109375, -5.48394775390625, 9.154006958007812, -7.4501800537109375, -3.450592041015625, -0.9294586181640625, 3.6978759765625, 13.180641174316406, -3.5665283203125, 27.749755859375, -3.476165771484375, 14.232925415039062, -5.06268310546875, 25.44232177734375, 20.86376953125, 11.02618408203125, -0.849273681640625, 4.169097900390625, 27.07686996459961, 2.9567413330078125, 30.302490234375, 4.82916259765625, 30.42742919921875, 1.408447265625, -5.943603515625, 25.148406982421875, 14.804298400878906, 13.518821716308594, -10.014152526855469, 5.729339599609375, -0.8677444458007812, -41.043975830078125, -38.55070495605469, -8.125991821289062, 17.329795837402344, 21.89984130859375, -17.591278076171875, -4.503662109375, 5.8875579833984375, 14.053308486938477, 5.5950927734375, 44.9580078125, 19.031524658203125, 12.8414306640625, -13.49188232421875, 55.370513916015625, 4.3803253173828125, 22.316085815429688, 10.176198959350586, -6.9019927978515625, 22.326187133789062, -12.4205322265625, -12.176673889160156, -9.208198547363281, 42.6680908203125, 22.50958251953125, -32.90547180175781, -4.04168701171875, -7.48046875, 14.06866455078125, 19.084197998046875, 54.5782470703125, 25.41851806640625, 12.806976318359375, 17.92767333984375, 1.628021240234375, -12.075897216796875, 11.289039611816406, 0.431671142578125, 0.0, 25.053253173828125, 33.43617248535156, -0.8992557525634766, 4.834938049316406, 7.617622375488281, 25.77337646484375, -16.373809814453125, -35.09234619140625, 13.093841552734375, -4.393157958984375, 24.291778564453125, 36.362396240234375, -8.860366821289062, 12.571136474609375, 1.9513187408447266, 17.578460693359375, -20.4232177734375, 29.425247192382812], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000090.npy"}
{"epoch": 0.18848167539267016, "step": 91, "batch_size": 128, "mean": 9.505620956420898, "std": 20.64821434020996, "min": -35.30015563964844, "p10": -13.833790588378905, "median": 6.979351043701172, "p90": 39.51678771972656, "max": 59.500396728515625, "pos_frac": 0.6171875, "sample": [6.276397705078125, -7.645332336425781, -4.299732208251953, 51.923065185546875, -5.2640838623046875, -30.56231689453125, -35.30015563964844, 25.703643798828125, 15.962265014648438, -25.853240966796875, 6.695213317871094, -1.2451438903808594, 0.0, -27.35565185546875, -1.9885635375976562, -11.056793212890625, -1.561239242553711, -10.266204833984375, 59.500396728515625, -4.316436767578125, -24.325523376464844, -2.5029220581054688, 5.738800048828125, 37.52178955078125, -29.189910888671875, -7.57891845703125, 45.04583740234375, 31.333480834960938, 39.924102783203125, -2.2207069396972656, 5.96295166015625, 15.167861938476562, 42.523406982421875, 54.56324768066406, 2.8227806091308594, 18.02155303955078, -8.832817077636719, 16.573028564453125, 27.673004150390625, 20.6658935546875, 16.373245239257812, 23.023178100585938, 7.9178466796875, 12.186691284179688, -26.483123779296875, 6.6949310302734375, 4.010711669921875, 8.900299072265625, -1.9293212890625, 32.316253662109375, 7.26348876953125, -11.27301025390625, 34.002593994140625, 7.78912353515625, 39.40191650390625, 4.091217041015625, 21.137359619140625, 0.0, 22.469215393066406, -3.87646484375, 4.8217620849609375, -15.416412353515625, 12.370025634765625, -3.3912582397460938, 14.51153564453125, 10.50494384765625, 15.230422973632812, 18.439376831054688, -19.91131591796875, 1.5239448547363281, -8.366241455078125, 7.3389892578125, -1.425790786743164, 11.603515625, 9.219442367553711, -5.2375335693359375, -4.65570068359375, -5.963371276855469, 15.451202392578125, 34.173309326171875, 56.39532470703125, 18.210926055908203, 44.013641357421875, 6.507879257202148, 13.12774658203125, -8.619979858398438, -10.057403564453125, 23.270118713378906, 18.4722900390625, 14.463958740234375, 2.5725555419921875, 34.026458740234375, 35.312164306640625, -5.86700439453125, 5.618133544921875, 5.409212112426758, 28.401260375976562, 18.630592346191406, 13.694202423095703, 24.753082275390625, 16.4757080078125, 59.381805419921875, -13.653610229492188, -14.25421142578125, -1.6206722259521484, -24.031646728515625, 24.353515625, -3.2938079833984375, 22.72430419921875, -31.6009521484375, 14.511428833007812, 6.593019485473633, 30.31208038330078, -0.78070068359375, 16.44902801513672, 47.16412353515625, -0.87506103515625, 46.38671875, -4.412750244140625, 56.055511474609375, -0.4339141845703125, -22.23193359375, -6.696319580078125, 23.39971923828125, 9.339752197265625, 7.391876220703125, 39.784820556640625, -1.123565673828125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000091.npy"}
{"epoch": 0.1905759162303665, "step": 92, "batch_size": 128, "mean": 10.137374877929688, "std": 21.9947566986084, "min": -54.388671875, "p10": -18.527899169921874, "median": 8.242281913757324, "p90": 40.718466186523436, "max": 64.1732177734375, "pos_frac": 0.7109375, "sample": [-20.891448974609375, 4.19293212890625, -27.90777587890625, 14.33563232421875, 31.092529296875, 9.415634155273438, 2.9639129638671875, -15.151290893554688, 44.5985107421875, -9.49078369140625, 18.87994384765625, 0.8095169067382812, 9.51806640625, -6.237762451171875, -18.648040771484375, -18.476409912109375, 33.209869384765625, 11.428359985351562, 28.659156799316406, 0.0, -8.050201416015625, 1.259124755859375, 40.67462158203125, 7.86883544921875, -16.912200927734375, 47.25299072265625, 28.4293212890625, -52.07611083984375, 20.696762084960938, -24.5753173828125, 28.306854248046875, 30.748779296875, 15.758590698242188, 18.121490478515625, 21.934219360351562, 8.85107421875, 3.883575439453125, 12.56292724609375, 28.567276000976562, 13.63653564453125, -21.91503143310547, 24.42291259765625, 36.86512756347656, 30.401275634765625, -39.360321044921875, 7.347900390625, 43.89898681640625, -10.438613891601562, -19.18359375, 46.37457275390625, -20.79119873046875, 1.068756103515625, 11.76763916015625, 33.360870361328125, -33.5078125, 25.863922119140625, -8.836715698242188, 3.363525390625, 31.17108154296875, 41.074005126953125, 3.12213134765625, 4.063995361328125, -5.33819580078125, -1.371429443359375, 32.794097900390625, 3.0632476806640625, 0.7658538818359375, 7.663434982299805, -1.83709716796875, -9.606353759765625, 0.22896194458007812, -54.388671875, 5.0115966796875, 7.1763763427734375, 13.709213256835938, 24.9521484375, -24.71331787109375, -10.25600814819336, -26.591888427734375, 18.4249267578125, -1.126922607421875, -5.871711730957031, 10.207565307617188, 21.256797790527344, 20.589080810546875, -6.387054443359375, 64.1732177734375, 17.701202392578125, 47.85791015625, 21.660873413085938, 16.080116271972656, 21.95477294921875, -5.79510498046875, 8.023672103881836, 6.359199523925781, 6.944553375244141, 50.10052490234375, -1.322845458984375, 49.2071533203125, 33.55224609375, 17.96942138671875, 8.460891723632812, 13.8704833984375, 0.0833740234375, 11.827423095703125, -6.86932373046875, 2.29351806640625, 26.225814819335938, 7.899993896484375, 24.898300170898438, 11.962066650390625, 17.44439697265625, -2.4290771484375, 20.525665283203125, 33.599517822265625, -1.6812801361083984, 11.142951965332031, 0.633270263671875, 50.45709228515625, 6.585845947265625, -7.241729736328125, -7.8022003173828125, 2.393705368041992, 58.527679443359375, 50.6943359375, 40.820770263671875, 39.87940979003906, 1.1863861083984375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000092.npy"}
{"epoch": 0.19267015706806281, "step": 93, "batch_size": 128, "mean": 9.570716857910156, "std": 20.272010803222656, "min": -40.71783447265625, "p10": -14.228829956054685, "median": 8.233543395996094, "p90": 34.86348419189453, "max": 75.76849365234375, "pos_frac": 0.6796875, "sample": [-0.8105430603027344, 25.85980224609375, 3.390716552734375, 59.67755126953125, 7.5012969970703125, -5.658935546875, 7.744728088378906, 26.317169189453125, 4.8326416015625, 34.73887634277344, -9.285926818847656, 12.55470085144043, 2.9820556640625, 8.333564758300781, 15.37215805053711, 20.69500732421875, 6.58148193359375, -27.7437744140625, 35.15423583984375, 27.1671142578125, 13.470611572265625, -1.7446975708007812, -6.6533203125, -6.240610122680664, 50.089111328125, 18.862060546875, 25.30279541015625, -21.9803466796875, 21.37591552734375, 6.185325622558594, -30.037689208984375, 75.76849365234375, -15.882186889648438, 20.962379455566406, -0.653656005859375, 11.091903686523438, 12.620857238769531, 45.81915283203125, 4.2631683349609375, 37.21514892578125, 17.025100708007812, 21.802871704101562, 8.133522033691406, 14.376823425292969, 28.731536865234375, -10.451629638671875, 19.8931884765625, 15.261871337890625, -19.558311462402344, 8.433807373046875, 13.176471710205078, 4.462762832641602, 17.914657592773438, 19.690170288085938, -10.35943603515625, 1.039154052734375, 11.685455322265625, 6.603424072265625, 0.0, -12.142242431640625, 48.48773193359375, 4.719738006591797, -16.015762329101562, -9.817245483398438, 10.27447509765625, 11.83051872253418, 10.494171142578125, -19.070022583007812, -12.868438720703125, 26.724853515625, 8.863014221191406, -4.77191162109375, 36.691436767578125, 12.999465942382812, -3.4334487915039062, 13.341339111328125, -2.3201904296875, -9.828731536865234, 40.29400634765625, 2.6104202270507812, 0.741973876953125, 6.5372314453125, 18.422691345214844, -20.9691162109375, 17.258941650390625, 0.0, 3.657470703125, -6.227104187011719, -0.7513446807861328, 20.766220092773438, 40.449310302734375, 30.40887451171875, 11.572479248046875, 20.007965087890625, 5.859130859375, 55.22174072265625, 15.778472900390625, 2.6336669921875, 29.644638061523438, 33.09307861328125, -6.8360443115234375, 6.585752487182617, 1.21759033203125, 62.926666259765625, 25.898635864257812, 20.027053833007812, 19.000885009765625, -28.35284423828125, 20.04095458984375, -25.186737060546875, 36.2501220703125, -2.7870635986328125, 33.162506103515625, -9.166900634765625, 32.9849853515625, -40.71783447265625, -21.915756225585938, -9.346290588378906, -2.3608856201171875, 1.4044914245605469, -12.01385498046875, -13.520248413085938, 22.993297576904297, -34.972900390625, -0.4311504364013672, 21.1143798828125, 4.7916107177734375, 19.9920654296875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000093.npy"}
{"epoch": 0.19476439790575917, "step": 94, "batch_size": 128, "mean": 9.35784912109375, "std": 22.75712013244629, "min": -76.1517333984375, "p10": -21.815638732910156, "median": 8.019855499267578, "p90": 39.55076293945312, "max": 61.31536865234375, "pos_frac": 0.6953125, "sample": [-5.8999786376953125, 33.83793640136719, -21.493606567382812, 36.653594970703125, -35.3707275390625, 49.7369384765625, 19.55208969116211, 10.89990234375, 38.690185546875, 8.077033996582031, 7.2104339599609375, 17.15179443359375, 14.905364990234375, -5.837749481201172, -3.6923828125, 10.42965316772461, 3.560821533203125, 16.91546630859375, 19.894256591796875, 21.412918090820312, 26.76544189453125, 9.602005004882812, 50.852569580078125, -38.90730285644531, 6.5263671875, 28.241302490234375, -10.783706665039062, -4.008148193359375, 11.335540771484375, -28.00360107421875, -0.22943115234375, 2.08001708984375, 35.79954528808594, 47.49017333984375, 52.49664306640625, 5.417900085449219, 26.3853759765625, 7.25128173828125, -23.220046997070312, 56.5345458984375, 30.622161865234375, 15.545181274414062, 7.962677001953125, -6.156684875488281, 6.007722854614258, 18.92730712890625, 12.855926513671875, -7.2767486572265625, -0.7030181884765625, 29.309341430664062, 1.88775634765625, 2.925121307373047, -0.7327232360839844, -18.908111572265625, 20.531585693359375, 5.538017272949219, -26.454116821289062, 41.460235595703125, 41.11900329589844, -25.7093505859375, 21.63824462890625, 3.4574737548828125, -7.966461181640625, -18.142059326171875, 8.090789794921875, 20.579925537109375, -1.38934326171875, -16.91712188720703, 12.857696533203125, 44.404052734375, 17.77532958984375, -10.678398132324219, 54.07231140136719, 24.535797119140625, 2.644683837890625, -0.30633544921875, 31.79278564453125, -24.643646240234375, 12.34466552734375, -9.75848388671875, 2.90765380859375, 20.274658203125, 39.21368408203125, -30.39300537109375, 12.139907836914062, 23.794769287109375, 4.6738739013671875, 15.724433898925781, 26.29302978515625, -27.514404296875, -76.1517333984375, 32.15350341796875, 33.272796630859375, 9.8956298828125, 61.31536865234375, 31.05941390991211, -40.21952819824219, -1.884552001953125, 34.228271484375, -8.525299072265625, 42.26739501953125, 13.833831787109375, 21.155029296875, -30.9239501953125, -12.7901611328125, 3.437530517578125, 43.60760498046875, 40.3372802734375, -6.8397216796875, 3.533111572265625, 1.2033004760742188, -8.005363464355469, 9.0908203125, -4.7769775390625, 15.321380615234375, 2.262054443359375, 31.0751953125, 7.457252502441406, 2.218109130859375, 3.82586669921875, 4.828365325927734, 22.161378860473633, 17.067184448242188, 20.22589111328125, -22.567047119140625, 3.908843994140625, -0.9746074676513672, 2.2030029296875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000094.npy"}
{"epoch": 0.1968586387434555, "step": 95, "batch_size": 128, "mean": 10.094449996948242, "std": 24.139585494995117, "min": -70.764404296875, "p10": -15.248402404785155, "median": 8.475990295410156, "p90": 41.36938781738281, "max": 76.58087158203125, "pos_frac": 0.703125, "sample": [-7.063398361206055, -3.2099609375, -1.6072311401367188, 67.57498168945312, 20.553285598754883, -35.96287536621094, 21.9322509765625, 30.551513671875, 9.349838256835938, 9.155853271484375, 20.172225952148438, 8.205101013183594, 43.313720703125, 8.185043334960938, 9.341453552246094, 31.808624267578125, 15.455657958984375, 71.35186767578125, -7.2007598876953125, 25.00506591796875, 26.86248779296875, 20.2777099609375, -29.9122314453125, 17.160003662109375, -5.5941162109375, 23.913074493408203, 16.255966186523438, 5.585285186767578, -4.1949615478515625, 22.50299072265625, 7.9150390625, 30.603973388671875, 14.44403076171875, -1.785003662109375, 14.491683959960938, 7.6434326171875, -3.709808349609375, 2.794647216796875, 33.58763122558594, 14.9083251953125, 27.577850341796875, 0.035125732421875, -33.82110595703125, 15.868988037109375, 23.626251220703125, 0.7999000549316406, 21.2310791015625, -11.065460205078125, 8.746879577636719, 7.151096343994141, -11.848899841308594, 26.45318603515625, -7.6387939453125, -2.683134078979492, -6.20550537109375, 3.2995567321777344, 21.95269775390625, 47.37939453125, 37.020469665527344, 7.0557708740234375, -2.7055511474609375, 19.902435302734375, 13.992218017578125, -35.76837158203125, 42.517120361328125, 19.09442138671875, 3.004791259765625, 0.522735595703125, 19.58979034423828, 19.54681396484375, -1.5147705078125, 9.244720458984375, 6.159149169921875, 9.657196044921875, 69.096435546875, -16.045135498046875, 12.660148620605469, 23.19312286376953, 30.818801879882812, 18.627273559570312, 20.056549072265625, 43.18408203125, -10.257232666015625, -4.7901153564453125, 3.5238494873046875, -25.914627075195312, 0.8921165466308594, -7.766056060791016, -8.582992553710938, -27.679656982421875, 15.039230346679688, 22.632064819335938, -14.65289306640625, 0.5931129455566406, 6.7582550048828125, 76.58087158203125, 4.18121337890625, 6.170921325683594, -31.280517578125, 18.09149169921875, 2.4581756591796875, 17.006622314453125, 14.259666442871094, 28.416961669921875, 47.86138916015625, -56.546600341796875, 52.04296875, -15.029815673828125, 21.46881103515625, -0.1480865478515625, -1.4456939697265625, -3.647369384765625, 2.6913394927978516, 40.87750244140625, 1.3802947998046875, 14.672012329101562, 70.37451171875, -24.889007568359375, -15.758438110351562, 3.80908203125, -7.6729888916015625, 20.079696655273438, -40.89569091796875, 5.10247802734375, 6.3929290771484375, 56.26776123046875, -70.764404296875, 49.754638671875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000095.npy"}
{"epoch": 0.19895287958115182, "step": 96, "batch_size": 128, "mean": 10.172271728515625, "std": 24.256547927856445, "min": -72.6627197265625, "p10": -21.642298889160156, "median": 7.879249572753906, "p90": 39.30961608886719, "max": 80.17333984375, "pos_frac": 0.671875, "sample": [-10.208343505859375, -41.1446533203125, -3.4693603515625, 19.421550750732422, 38.945648193359375, 29.356685638427734, 26.195220947265625, 6.780601501464844, 23.05303955078125, -22.286285400390625, 20.117263793945312, 8.551712036132812, 22.68128204345703, -3.946380615234375, 3.0154876708984375, -1.68170166015625, 2.9532623291015625, 56.01324462890625, 56.87127685546875, 15.716064453125, 13.97467041015625, 24.491600036621094, 10.318778991699219, 31.300628662109375, 7.041259765625, -2.81097412109375, 3.1833267211914062, -12.445587158203125, -21.421615600585938, 6.576713562011719, 4.686975479125977, 22.454959869384766, 40.15887451171875, 25.79291534423828, -23.735116958618164, -3.44232177734375, -0.5706748962402344, 0.6371231079101562, -13.449874877929688, -1.1271743774414062, 2.10211181640625, 5.382537841796875, 42.10429382324219, 28.880447387695312, 25.83465576171875, -28.97857666015625, 5.515869140625, -37.83360290527344, 3.4293670654296875, 45.832733154296875, 35.334686279296875, 17.81316375732422, -44.648406982421875, 32.0186767578125, 9.730682373046875, 1.8954734802246094, -16.757247924804688, -22.1572265625, -2.5987396240234375, 26.400299072265625, -27.735626220703125, 19.333740234375, 20.74383544921875, 33.9315185546875, 26.453125, 4.8994140625, 10.830366134643555, 0.35324859619140625, 24.84039306640625, 20.860595703125, -27.63568115234375, 48.2554931640625, -7.6793670654296875, 13.972900390625, 65.17832946777344, 22.217376708984375, 14.930122375488281, 21.054977416992188, 7.4368896484375, 28.01318359375, 20.31133270263672, -26.180313110351562, 74.8209228515625, 10.99554443359375, -7.305671691894531, 16.887237548828125, -9.989654541015625, 57.0498046875, 37.344512939453125, -10.664215087890625, -2.003398895263672, -3.1432037353515625, -25.58141326904297, 28.388729095458984, -5.101593017578125, 20.444541931152344, -11.932647705078125, 7.684967041015625, 19.23583221435547, 7.162555694580078, -6.79534912109375, 8.073532104492188, -8.905914306640625, 5.090572357177734, 2.3021392822265625, 16.332366943359375, 80.17333984375, 20.701858520507812, -10.852031707763672, -13.909027099609375, 23.15618896484375, 21.461273193359375, 19.50701904296875, -3.282012939453125, 65.1590576171875, -10.520965576171875, 10.320716857910156, 18.77520751953125, 48.9083251953125, 0.0, 6.125993728637695, 30.78167724609375, 48.73248291015625, -23.84139633178711, -1.409027099609375, 16.46063232421875, 5.6368408203125, -72.6627197265625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000096.npy"}
{"epoch": 0.20104712041884817, "step": 97, "batch_size": 128, "mean": 11.961019515991211, "std": 23.093889236450195, "min": -51.836090087890625, "p10": -17.587381744384764, "median": 10.828643798828125, "p90": 43.15086669921874, "max": 74.6175537109375, "pos_frac": 0.7265625, "sample": [14.001251220703125, 22.10137939453125, 17.07257080078125, -13.76580810546875, -10.653579711914062, -33.8994140625, 23.79193115234375, 58.366180419921875, 38.2720947265625, 3.5229949951171875, -2.2430419921875, 22.176559448242188, 11.4368896484375, 6.827476501464844, 27.1529541015625, 4.419921875, 24.8822021484375, 0.0, 52.6380615234375, 26.119720458984375, 0.0, 12.734375, 19.428131103515625, -1.188507080078125, 11.70941162109375, 6.374492645263672, 9.753570556640625, 10.681625366210938, -16.68572235107422, -51.836090087890625, 14.416168212890625, 23.31707763671875, 9.029886245727539, 9.569873809814453, -10.769477844238281, 9.38503646850586, 40.66056442260742, -3.970458984375, -24.350921630859375, 8.877227783203125, 13.307876586914062, 1.2920761108398438, -15.42181396484375, 1.643035888671875, 18.441162109375, 5.67181396484375, 22.223663330078125, 46.71006774902344, 11.4449462890625, 41.9610595703125, -15.390129089355469, 54.440093994140625, 9.546577453613281, 64.12948608398438, 10.975662231445312, 4.920318603515625, 46.504730224609375, 11.449089050292969, 9.342557907104492, 74.6175537109375, 16.44988250732422, 42.2579345703125, -8.0411376953125, -25.417526245117188, 20.37286376953125, 34.24729919433594, -1.1596870422363281, 45.234375, 1.81298828125, 9.60784912109375, 8.4813232421875, 30.44012451171875, 32.945709228515625, -30.353363037109375, -0.9620361328125, -11.038528442382812, -12.173233032226562, 22.293197631835938, 10.20819091796875, -2.00506591796875, 28.8046875, 6.7058258056640625, 0.4208869934082031, 52.66864013671875, 2.1380538940429688, 52.15800476074219, -20.273666381835938, -8.352325439453125, 26.598190307617188, -8.97222900390625, 17.41884994506836, -29.742431640625, 8.309150695800781, -24.953506469726562, 2.92742919921875, 25.00970458984375, 53.95111083984375, 49.444671630859375, 1.4673652648925781, 3.22064208984375, -25.998992919921875, 27.01898193359375, 16.890380859375, 32.74254608154297, 5.682323455810547, 22.736473083496094, -28.101844787597656, 20.381393432617188, 29.081687927246094, 17.3482666015625, -16.132041931152344, 18.143842697143555, 18.037109375, -42.042724609375, 22.852886199951172, 34.86627197265625, 63.014251708984375, 18.4041748046875, -4.6522216796875, 26.3836669921875, 17.794952392578125, 4.3548583984375, 19.65252685546875, -19.691253662109375, 33.121612548828125, -23.730438232421875, -0.974273681640625, 34.509490966796875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000097.npy"}
{"epoch": 0.2031413612565445, "step": 98, "batch_size": 128, "mean": 12.29484748840332, "std": 25.752153396606445, "min": -68.38958740234375, "p10": -13.551097106933593, "median": 9.349498748779297, "p90": 48.8784423828125, "max": 96.22036743164062, "pos_frac": 0.65625, "sample": [31.4893798828125, 59.416015625, 49.5426025390625, 13.058208465576172, -20.37420654296875, -7.88037109375, 7.146430969238281, -11.4429931640625, 9.949996948242188, -12.791259765625, -17.391525268554688, -3.1744117736816406, 2.21343994140625, 8.189556121826172, 49.4493408203125, 96.22036743164062, 57.96002197265625, -0.2081298828125, 37.574798583984375, 4.7833251953125, 7.3916015625, 24.453231811523438, 1.0708389282226562, 39.23614501953125, 9.282554626464844, 33.36834716796875, -13.700836181640625, 8.207263946533203, 8.218994140625, 7.232872009277344, -0.09091949462890625, 22.09893035888672, 75.64849853515625, 34.25506591796875, 37.44627380371094, -35.125732421875, -11.444345474243164, 57.358551025390625, 31.6458740234375, 11.030937194824219, 5.96044921875, -6.668304443359375, -10.263916015625, 15.369140625, -7.176460266113281, -0.868621826171875, 1.9369277954101562, -5.415435791015625, 21.853260040283203, -5.720367431640625, 32.306793212890625, 34.50982666015625, 8.41009521484375, 30.970489501953125, 69.4056396484375, 17.917449951171875, 36.537689208984375, 13.404045104980469, -0.05199432373046875, 3.1711158752441406, 2.5662078857421875, -6.1470947265625, 19.903060913085938, 13.385772705078125, 13.05731201171875, -5.575958251953125, 58.832061767578125, -33.82731628417969, 6.674041748046875, 10.5802001953125, -18.065078735351562, 16.796279907226562, -1.9454269409179688, -8.69803237915039, 9.558578491210938, -7.925872802734375, 63.2186279296875, 22.585174560546875, -14.81744384765625, 49.36199951171875, -17.406890869140625, -5.22857666015625, 45.053375244140625, 10.369110107421875, 9.41644287109375, -12.586700439453125, -17.273712158203125, 21.064315795898438, 29.692108154296875, 21.57574462890625, 52.53228759765625, 34.49957275390625, 28.597122192382812, -3.1031646728515625, 77.73980712890625, 33.750091552734375, -6.740814208984375, -0.6778564453125, 28.943405151367188, 24.608734130859375, -4.472564697265625, 48.67120361328125, 9.8079833984375, 27.832122802734375, -12.682373046875, -41.86126708984375, 3.8362293243408203, -68.38958740234375, -43.09532165527344, -6.923469543457031, -13.486923217773438, -5.600372314453125, 22.96209716796875, 16.1846923828125, 10.791595458984375, 32.55609130859375, 10.253799438476562, 23.226104736328125, 3.2147979736328125, -3.948394775390625, 25.93756103515625, -7.37384033203125, 27.04602813720703, 2.82867431640625, 26.10150146484375, 17.090744018554688, -32.904510498046875, 4.8939208984375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000098.npy"}
{"epoch": 0.20523560209424083, "step": 99, "batch_size": 128, "mean": 12.684839248657227, "std": 28.314104080200195, "min": -55.318115234375, "p10": -22.813641357421872, "median": 11.587852478027344, "p90": 49.588861083984376, "max": 82.81173706054688, "pos_frac": 0.703125, "sample": [-50.210174560546875, -2.4731979370117188, -39.819580078125, 57.132049560546875, 60.84716796875, -0.20566177368164062, -9.887603759765625, 7.75146484375, 54.820770263671875, 39.45030975341797, 45.080169677734375, 10.374557495117188, 9.683929443359375, 17.724090576171875, -30.900863647460938, -53.34716796875, -15.2760009765625, 23.94720458984375, -44.58009338378906, 16.494171142578125, 18.64129638671875, -25.3599853515625, 4.304925918579102, -26.37884521484375, 22.360504150390625, 15.688179016113281, -9.2132568359375, 4.372108459472656, -42.52679443359375, 45.56890869140625, 7.923322677612305, 13.078880310058594, -2.8709335327148438, 9.240081787109375, 16.988922119140625, 65.42977905273438, -4.9801025390625, -4.845672607421875, 32.447021484375, 4.9864959716796875, 22.671119689941406, 14.794898986816406, 23.0665283203125, -55.318115234375, -50.609466552734375, 27.696014404296875, 13.164764404296875, -6.9052886962890625, 0.8435516357421875, 48.79656982421875, 4.145599365234375, 29.953155517578125, 44.478363037109375, 3.1085357666015625, 0.0, 5.416877746582031, 32.34375, 7.277711868286133, 49.75091552734375, -20.230743408203125, 14.845458984375, 82.81173706054688, -10.040069580078125, 1.0574722290039062, 0.30731201171875, -5.26629638671875, 34.41937255859375, -16.4654541015625, 41.44371032714844, 44.54248046875, 25.73230743408203, 3.24920654296875, 68.03228759765625, 37.15165710449219, 8.06964111328125, -12.207122802734375, 28.335933685302734, -17.831817626953125, 39.83062744140625, 22.256988525390625, 15.858062744140625, -3.7477970123291016, 6.76214599609375, 36.89044189453125, -8.07781982421875, 46.994903564453125, 1.1462783813476562, -28.112342834472656, 28.427459716796875, 11.035064697265625, -39.341705322265625, 7.339424133300781, 10.080421447753906, -18.38958740234375, 54.432952880859375, 10.065399169921875, -17.755279541015625, 16.174468994140625, -7.872344970703125, 19.32330322265625, 56.11161804199219, -21.72235107421875, 21.228374481201172, 0.07283210754394531, 13.520309448242188, 29.94403076171875, 46.03369140625, -1.190521240234375, 16.2257080078125, -13.028457641601562, 49.5194091796875, 1.2757492065429688, 25.905994415283203, 67.08242797851562, 18.276565551757812, -8.833572387695312, 57.5103759765625, 43.8170166015625, 63.940887451171875, 20.532394409179688, -25.736724853515625, 17.895614624023438, 35.24366760253906, 12.140640258789062, 40.8406982421875, 22.111541748046875, 63.99058532714844, 3.536895751953125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000099.npy"}
{"epoch": 0.20732984293193718, "step": 100, "batch_size": 128, "mean": 12.528626441955566, "std": 25.46809959411621, "min": -43.72137451171875, "p10": -19.30957565307617, "median": 10.527774810791016, "p90": 47.364456176757805, "max": 80.1146240234375, "pos_frac": 0.6875, "sample": [13.7236328125, 18.50970458984375, 30.925735473632812, -0.1704559326171875, -2.04302978515625, 17.224136352539062, -8.25860595703125, 11.881103515625, -5.680133819580078, -9.694427490234375, -26.155929565429688, 57.57148742675781, 48.98040771484375, 31.076080322265625, -20.405189514160156, -12.273834228515625, 38.509857177734375, 4.6911468505859375, -15.235122680664062, -26.599578857421875, -3.643280029296875, 13.47507095336914, 2.1475582122802734, 24.389266967773438, -35.620086669921875, 22.157424926757812, 51.413482666015625, -31.3067626953125, 17.38890838623047, -18.84002685546875, 14.777923583984375, 15.433883666992188, 35.0262451171875, 2.37347412109375, -35.4273681640625, 44.01788330078125, 12.08917236328125, 46.671905517578125, 3.539642333984375, 38.27442932128906, 10.470672607421875, -3.350128173828125, 12.319549560546875, 3.1524658203125, -18.457015991210938, -5.185266494750977, 10.584877014160156, 37.95216369628906, 9.904937744140625, 17.251220703125, 24.879486083984375, 16.26019287109375, 71.37191772460938, 3.169586181640625, 63.49543762207031, 39.0401611328125, 16.72222900390625, 8.863515853881836, 6.246665954589844, -3.982696533203125, 0.2790546417236328, 16.8515625, 38.7501220703125, -24.942169189453125, -4.291961669921875, 13.724151611328125, -4.66180419921875, 11.168548583984375, 1.370635986328125, 15.419342041015625, 6.72802734375, 24.667236328125, -9.33502197265625, 7.6882171630859375, 21.434707641601562, 11.558380126953125, -27.62371826171875, -1.79913330078125, -43.72137451171875, -2.6736602783203125, 10.300857543945312, 6.415319442749023, 3.6974105834960938, -16.235870361328125, 32.084197998046875, -20.597747802734375, 38.102352142333984, 66.66688537597656, 39.114837646484375, 80.1146240234375, 25.49346923828125, 3.278289794921875, 2.435455322265625, 41.92138671875, 35.89251708984375, -16.381301879882812, 2.456512451171875, 32.3912353515625, 24.987060546875, 68.13119506835938, -1.9646720886230469, 22.525146484375, 29.25802230834961, 54.9140625, -12.920951843261719, 59.074005126953125, -26.12841796875, -10.881103515625, 3.9970626831054688, 56.3154296875, -15.063400268554688, 26.776763916015625, -24.424423217773438, -2.1351776123046875, 29.415924072265625, 16.508098602294922, 8.253210067749023, 22.16741943359375, 62.935333251953125, 6.086273193359375, -12.486221313476562, 6.46978759765625, 69.61672973632812, 16.305686950683594, 28.687965393066406, -5.0602874755859375, 31.191162109375, -32.25474548339844], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000100.npy"}
{"epoch": 0.2094240837696335, "step": 101, "batch_size": 128, "mean": 14.8983154296875, "std": 27.514724731445312, "min": -54.888458251953125, "p10": -22.028683471679685, "median": 10.932754516601562, "p90": 49.367428588867185, "max": 96.8245849609375, "pos_frac": 0.7421875, "sample": [41.4013671875, -54.888458251953125, 8.561904907226562, 24.76934814453125, 7.828418731689453, 2.6542816162109375, 33.73834228515625, 12.832412719726562, 6.7781219482421875, 8.412971496582031, -2.248476028442383, 36.67791748046875, 1.1547775268554688, -22.861557006835938, 0.3976287841796875, 39.093231201171875, -10.54638671875, -8.675485610961914, 13.764579772949219, 40.60520935058594, 5.9088134765625, 58.68548583984375, 32.088623046875, 22.400558471679688, -45.2969970703125, 32.41668701171875, -17.720117568969727, 32.53118896484375, 48.9945068359375, -2.10919189453125, 3.4191970825195312, 46.66706848144531, 28.284927368164062, -28.462890625, 32.18895721435547, 1.50823974609375, 63.17634582519531, 26.1553955078125, 35.008872985839844, 0.92095947265625, 53.90093994140625, 30.233078002929688, 57.844024658203125, 30.458175659179688, -35.736724853515625, 23.485565185546875, 58.3648681640625, 44.8856201171875, 5.566963195800781, 56.36979675292969, 9.692581176757812, 12.065608978271484, -32.3253173828125, -41.432037353515625, 25.165159225463867, 8.264892578125, -2.705291748046875, 0.430908203125, 27.1617431640625, 31.05695343017578, 10.725006103515625, -15.244476318359375, -25.70916748046875, 18.670700073242188, -12.307861328125, 29.565093994140625, 43.17132568359375, 42.0826416015625, -16.598297119140625, -2.25653076171875, 30.90570068359375, 47.60691833496094, 5.018529891967773, 39.28973388671875, 16.919586181640625, 16.303421020507812, 64.30819702148438, -40.253143310546875, -4.296224594116211, 10.085441589355469, 7.06103515625, 5.9633636474609375, 9.344594955444336, 3.5069522857666016, 29.737594604492188, -10.135498046875, 8.87762451171875, 20.178726196289062, 6.6928558349609375, 5.6827392578125, 37.801246643066406, -1.8677940368652344, -27.226654052734375, -1.569244384765625, -21.671737670898438, -0.7032527923583984, 12.8216552734375, 57.758453369140625, 96.8245849609375, 62.039306640625, 40.82347106933594, 34.676971435546875, 7.523040771484375, 35.41717529296875, 0.21588134765625, 64.6246337890625, -2.939361572265625, 14.525421142578125, -23.167190551757812, 4.37371826171875, 11.1405029296875, 32.935157775878906, 50.237579345703125, -1.5193252563476562, 3.2877578735351562, -46.02044677734375, 95.244384765625, 1.8988800048828125, 22.716400146484375, -2.6029815673828125, 9.093315124511719, 31.84893798828125, 14.1961669921875, 16.196853637695312, -26.395130157470703, -1.5820884704589844, 24.7310791015625, 17.43614959716797], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000101.npy"}
{"epoch": 0.21151832460732983, "step": 102, "batch_size": 128, "mean": 14.84056282043457, "std": 26.237537384033203, "min": -78.53158569335938, "p10": -17.10955810546875, "median": 14.637237548828125, "p90": 44.89972381591796, "max": 65.76715087890625, "pos_frac": 0.71875, "sample": [-0.0010833740234375, -15.8265380859375, 42.48481750488281, 6.902851104736328, 10.581039428710938, 42.11384582519531, -13.911468505859375, 43.22662353515625, 36.653411865234375, -16.70062255859375, 26.771896362304688, 13.989677429199219, -38.044189453125, -64.57289123535156, 33.903839111328125, -30.288619995117188, 15.3621826171875, -16.728195190429688, -27.17950439453125, 7.206573486328125, 14.90667724609375, 3.4036941528320312, 14.3677978515625, 51.637603759765625, -5.034257888793945, 39.1630859375, 0.0, 3.890350341796875, 12.622451782226562, 6.146520614624023, 29.74481201171875, -26.14971923828125, 17.8109130859375, 54.765594482421875, 42.26194763183594, -12.484678268432617, 20.9461669921875, -3.7416610717773438, 8.881355285644531, -2.8577346801757812, -17.999404907226562, 65.70585632324219, 27.640167236328125, 54.911651611328125, 19.93096923828125, 8.5089111328125, 26.674835205078125, 23.686187744140625, -14.238052368164062, 0.555450439453125, 38.43053436279297, 31.75312042236328, 38.435882568359375, 13.535781860351562, 22.77796173095703, 55.05535888671875, -2.3310699462890625, 53.176971435546875, 0.0, 25.08660888671875, 2.7418212890625, 1.0609130859375, 65.76715087890625, -0.4945220947265625, 12.873565673828125, 50.116455078125, 18.657501220703125, 13.951305389404297, -26.42822265625, -3.3329696655273438, 2.9258975982666016, 13.314346313476562, 35.177215576171875, -7.082916259765625, 6.694797515869141, -25.31243896484375, 28.33831787109375, 62.0670166015625, 19.4024658203125, 31.970611572265625, 32.31396484375, 42.85527038574219, 16.644716262817383, 43.80108642578125, 21.45587158203125, 26.921401977539062, -0.52978515625, 32.54631423950195, 14.98626708984375, 1.1419830322265625, 20.9368896484375, 25.844223022460938, 53.2955322265625, 43.8402099609375, 27.014892578125, 40.6072998046875, 44.03271484375, 9.1275634765625, 1.4035186767578125, 35.24501037597656, 10.288055419921875, 13.020469665527344, 23.222747802734375, -22.82135009765625, 7.47369384765625, -3.7600250244140625, -20.36578369140625, 46.92274475097656, 14.251075744628906, -12.982696533203125, 36.348358154296875, 40.15046691894531, 19.852005004882812, 30.322952270507812, 59.16534423828125, -35.95880126953125, -4.7370758056640625, 28.943801879882812, -78.53158569335938, 40.41375732421875, -2.989349365234375, 1.3115234375, 62.19891357421875, -2.227630615234375, 15.752273559570312, 43.48883056640625, -7.140228271484375, -35.43585205078125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000102.npy"}
{"epoch": 0.2136125654450262, "step": 103, "batch_size": 128, "mean": 13.621286392211914, "std": 26.896533966064453, "min": -53.89044189453125, "p10": -12.145414733886719, "median": 7.752891540527344, "p90": 53.74099426269531, "max": 91.27578735351562, "pos_frac": 0.734375, "sample": [-34.63775634765625, -3.6939239501953125, 73.924560546875, 30.895034790039062, 3.3409957885742188, -9.80230712890625, 8.9083251953125, 0.7932090759277344, -19.5911865234375, -16.180633544921875, -12.2945556640625, 15.126007080078125, 33.6533203125, 74.61122131347656, 53.56903076171875, 16.366943359375, 5.732032775878906, 54.142242431640625, 12.486083984375, 50.47894287109375, 87.48141479492188, 12.4974365234375, 0.3813323974609375, -31.885086059570312, 37.49249267578125, 41.641265869140625, 2.1811485290527344, 40.524322509765625, 15.306137084960938, 39.835235595703125, -6.71575927734375, 1.578695297241211, 56.46478271484375, 11.241357803344727, -11.806861877441406, 1.0950469970703125, -0.32878875732421875, 56.072967529296875, 45.18133544921875, 24.832130432128906, 7.372528076171875, 0.1259002685546875, 9.796371459960938, -42.19598388671875, 31.1671142578125, -30.24017333984375, 8.580963134765625, 4.773590087890625, 22.655776977539062, 23.676651000976562, 6.457679748535156, 8.836868286132812, 0.0, 7.6659698486328125, 6.932487487792969, 38.99090576171875, -1.9258918762207031, 7.839813232421875, 14.63421630859375, -6.1544342041015625, 27.236984252929688, 18.865478515625, 4.79156494140625, 39.16119384765625, 85.6500244140625, 6.223175048828125, -1.713470458984375, 41.1595458984375, 0.8098354339599609, 62.62123107910156, -17.986602783203125, 82.46133422851562, 4.8698577880859375, 14.3155517578125, 13.615707397460938, 11.89569091796875, -0.9794464111328125, 25.1068115234375, 30.86669921875, 27.620452880859375, 1.7053680419921875, -11.56646728515625, -9.988143920898438, -1.0260086059570312, 3.489654541015625, 58.61431884765625, -18.32025146484375, 6.50189208984375, 52.69805908203125, -11.80718994140625, 7.6295928955078125, 8.96832275390625, -0.43145751953125, 12.047771453857422, -1.739898681640625, -11.278106689453125, 6.5078582763671875, 6.787506103515625, 0.0, 12.04638671875, 1.6254310607910156, 62.59773254394531, 20.33331298828125, 23.128082275390625, 19.422012329101562, 14.687446594238281, 5.49847412109375, 8.758834838867188, 91.27578735351562, 8.9349365234375, 8.773994445800781, 56.63502502441406, -17.14529800415039, -53.89044189453125, 4.34869384765625, -8.386825561523438, -12.081497192382812, 33.235389709472656, 35.59519958496094, 13.392181396484375, 25.683990478515625, 4.4356536865234375, 1.7330589294433594, 1.9855422973632812, -8.060043334960938, -49.706390380859375, -20.467437744140625, 3.8624267578125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000103.npy"}
{"epoch": 0.2157068062827225, "step": 104, "batch_size": 128, "mean": 10.00366497039795, "std": 26.9212646484375, "min": -57.796112060546875, "p10": -22.89835510253906, "median": 8.36760139465332, "p90": 45.09170684814453, "max": 92.72927856445312, "pos_frac": 0.6640625, "sample": [6.85235595703125, -4.32659912109375, 11.90634536743164, 50.86944580078125, -24.382598876953125, 2.5960693359375, 16.890533447265625, -39.38885498046875, 73.09420776367188, 52.237548828125, 9.39697265625, -10.725349426269531, 0.6108856201171875, -11.007080078125, -33.492706298828125, 5.6929931640625, 4.15924072265625, 21.710819244384766, -3.2564315795898438, 24.79534912109375, -45.854339599609375, 2.853242874145508, 41.759674072265625, 26.05645751953125, 11.6497802734375, -44.51165771484375, -12.580780029296875, 31.654312133789062, 8.400875091552734, 17.788978576660156, 5.8192138671875, 17.027023315429688, -55.887939453125, 12.688911437988281, 12.528305053710938, 9.456050872802734, 5.961179733276367, -1.9936447143554688, 55.939483642578125, 5.83221435546875, -1.5908355712890625, 0.3125267028808594, 13.278244018554688, -49.1978759765625, 28.184585571289062, -14.069122314453125, 57.475189208984375, 12.095752716064453, 14.770957946777344, -36.82240295410156, 33.80351257324219, 30.251678466796875, 44.94981384277344, 47.2945556640625, -11.294975280761719, 37.0242919921875, 33.43121337890625, -11.684043884277344, 4.640712738037109, 32.99824523925781, 29.728057861328125, -29.495010375976562, 1.056884765625, 6.990478515625, 46.4532470703125, -1.373077392578125, 53.778106689453125, -14.83111572265625, 33.0123291015625, 15.06793212890625, -11.281387329101562, -10.768402099609375, 19.083251953125, -22.587615966796875, 2.5472412109375, -1.6685829162597656, 36.618194580078125, -13.030006408691406, -12.573883056640625, 0.039920806884765625, 16.52724838256836, -57.796112060546875, 74.74383544921875, -3.7692718505859375, -23.6234130859375, -9.021415710449219, -26.957763671875, 3.6060943603515625, -6.62432861328125, 34.403564453125, 92.72927856445312, -3.177337646484375, -22.394210815429688, 33.4793701171875, 1.340555191040039, -2.440338134765625, 11.59588623046875, 2.6668548583984375, 33.953033447265625, 20.284042358398438, -17.316619873046875, 22.543701171875, -13.265975952148438, 33.33929443359375, 10.398153305053711, 45.42279052734375, 14.403564453125, 26.871856689453125, -17.637622833251953, 31.57984161376953, 4.468414306640625, 8.334327697753906, 40.610931396484375, 43.43309783935547, 39.73802947998047, 48.714508056640625, -7.098052978515625, -28.56842041015625, 0.0, 26.13055419921875, 21.286224365234375, 20.766021728515625, -2.9885330200195312, 6.2281341552734375, 58.137939453125, 8.59503173828125, 21.829238891601562, 13.548072814941406], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000104.npy"}
{"epoch": 0.21780104712041884, "step": 105, "batch_size": 128, "mean": 20.60167694091797, "std": 28.652568817138672, "min": -74.92694091796875, "p10": -12.713693237304687, "median": 18.978445053100586, "p90": 53.02577667236328, "max": 104.2991943359375, "pos_frac": 0.7890625, "sample": [12.40875244140625, 19.021011352539062, 50.53303527832031, 8.31527328491211, 1.5229949951171875, 56.67549133300781, 7.0711822509765625, 38.05804443359375, 20.12469482421875, -11.486465454101562, 46.62567138671875, 52.831298828125, 42.61181640625, 74.38433837890625, -3.1121673583984375, 40.57798767089844, 46.916259765625, 15.721099853515625, 46.7310791015625, 2.73883056640625, 18.939952850341797, -13.337493896484375, 10.67486572265625, 0.0, 14.4664306640625, 18.026519775390625, 36.94232177734375, 10.1053466796875, 1.0396804809570312, 34.66941833496094, 46.72174072265625, 25.273681640625, -5.415771484375, 21.37225341796875, 46.208648681640625, 41.89891815185547, 47.66180419921875, -6.292236328125, 44.684326171875, -28.394287109375, 2.065826416015625, 6.40155029296875, 15.237091064453125, 12.642936706542969, 21.017059326171875, 0.92779541015625, -6.824432373046875, 22.071640014648438, 15.65606689453125, -8.25799560546875, 25.2200927734375, -62.16729736328125, 7.23832893371582, 17.510345458984375, -5.274181365966797, 15.345855712890625, 16.656402587890625, 30.650146484375, -34.41094970703125, 53.277862548828125, -14.788604736328125, 41.20623779296875, 13.020469665527344, -74.92694091796875, 25.874710083007812, -5.9795074462890625, 33.488067626953125, 9.855239868164062, 10.8260498046875, 58.86590576171875, 41.795196533203125, 49.567169189453125, 55.175201416015625, 37.79510498046875, -47.67083740234375, 5.5951385498046875, 16.015594482421875, 43.29559326171875, 15.147567749023438, -24.5316162109375, 22.78158950805664, 48.784942626953125, 19.016937255859375, -12.44635009765625, 40.64826965332031, 7.5804595947265625, 68.16329956054688, 54.70166015625, 25.96575164794922, -41.092987060546875, 14.395597457885742, 52.91773986816406, 36.272735595703125, 64.71265411376953, 20.985137939453125, 9.055206298828125, 48.958648681640625, 46.1849365234375, -0.611358642578125, 63.713287353515625, 2.55438232421875, 30.288238525390625, 37.3895263671875, -17.94854736328125, 2.1126251220703125, 62.9200439453125, 38.22306823730469, 13.459686279296875, -3.7686767578125, -16.39813232421875, -5.138702392578125, 34.475860595703125, 46.961669921875, 45.563385009765625, 104.2991943359375, -3.2880706787109375, 32.29833984375, 17.368934631347656, 30.77703857421875, 79.19258117675781, 58.719085693359375, -28.375335693359375, 7.5531005859375, 0.2943572998046875, -23.793212890625, 49.70916748046875, 42.84693908691406, 39.90362548828125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000105.npy"}
{"epoch": 0.2198952879581152, "step": 106, "batch_size": 128, "mean": 12.51746940612793, "std": 30.40545082092285, "min": -71.47273254394531, "p10": -24.693888854980468, "median": 11.530975341796875, "p90": 48.86615600585936, "max": 82.52792358398438, "pos_frac": 0.640625, "sample": [2.9916229248046875, -4.88787841796875, 20.942169189453125, 17.619140625, 11.618499755859375, 11.443450927734375, 2.413116455078125, -10.329856872558594, 34.57391357421875, -16.632049560546875, -19.215591430664062, -9.623931884765625, 38.82609558105469, 31.4912109375, 3.2672119140625, 3.3441162109375, 12.4796142578125, -11.902412414550781, -2.594146728515625, -1.937255859375, 74.19970703125, 29.556137084960938, 7.71380615234375, 46.71332550048828, 8.824371337890625, 82.52792358398438, 19.331954956054688, 10.397254943847656, 71.15325927734375, -1.4239177703857422, -8.898468017578125, 26.532302856445312, 10.621185302734375, 47.385986328125, -11.662384033203125, 18.235153198242188, 44.999755859375, -38.23316955566406, 18.190322875976562, 38.478607177734375, 22.70539093017578, -5.455108642578125, 31.582244873046875, 42.21612548828125, -15.276575088500977, 63.309234619140625, -69.05877685546875, -59.13677978515625, 52.87158203125, -24.936126708984375, 41.86268615722656, 30.55584716796875, 71.56539916992188, -32.489532470703125, -2.6202163696289062, -12.443778991699219, -8.132568359375, 18.19085693359375, 68.26239013671875, 4.190620422363281, 13.4832763671875, -17.542007446289062, 46.93328857421875, 15.8492431640625, -9.599212646484375, 31.89764404296875, -37.523651123046875, -4.781097412109375, 44.079559326171875, -23.162078857421875, 28.546592712402344, -33.2579345703125, 22.539764404296875, 12.734024047851562, 28.256986618041992, -13.960372924804688, 38.019012451171875, 73.54388427734375, 74.51777648925781, 42.635162353515625, -15.373207092285156, 20.27203369140625, -31.535064697265625, 2.2150955200195312, 21.478622436523438, 11.251644134521484, 46.05390930175781, -25.49859619140625, 42.701080322265625, -35.33154296875, 33.42869567871094, -9.016075134277344, 45.52667236328125, 26.940521240234375, 0.95330810546875, 28.80712890625, 0.191131591796875, 53.64093017578125, 58.535400390625, 57.519134521484375, -14.209793090820312, 28.5821533203125, -24.590072631835938, -0.912689208984375, 29.916351318359375, 52.31988525390625, 2.741424560546875, 21.75847625732422, -0.49713134765625, -4.1427764892578125, -44.38623046875, -71.47273254394531, -15.200836181640625, -18.465866088867188, 25.73492431640625, 12.365219116210938, 9.1986083984375, 9.764556884765625, -31.733245849609375, -13.92034912109375, 28.64111328125, 3.199676513671875, -0.5427932739257812, 23.966758728027344, 42.23991394042969, 18.706336975097656, 41.50285339355469, -0.591400146484375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000106.npy"}
{"epoch": 0.22198952879581152, "step": 107, "batch_size": 128, "mean": 19.731876373291016, "std": 28.83608055114746, "min": -103.38604736328125, "p10": -11.098133087158201, "median": 16.695526123046875, "p90": 57.84939270019531, "max": 91.71334838867188, "pos_frac": 0.7421875, "sample": [15.312347412109375, -8.52020263671875, 9.912277221679688, 16.991249084472656, 10.5303955078125, 18.808837890625, 52.29071044921875, 50.36077880859375, 28.851043701171875, -18.26763916015625, 30.71527099609375, 66.5858154296875, 31.47711181640625, 10.970733642578125, 11.588008880615234, -1.8165473937988281, 0.5157470703125, -17.99913787841797, 12.15008544921875, 57.640899658203125, 28.59814453125, 1.67822265625, 11.0723876953125, 6.96807861328125, 44.2237548828125, 27.591018676757812, 5.307403564453125, -10.350616455078125, 16.7340087890625, 5.635772705078125, -5.223876953125, 36.69110107421875, 2.373992919921875, -8.3834228515625, 32.164794921875, 58.33587646484375, 27.94537353515625, 17.18317413330078, 91.71334838867188, 4.9836883544921875, -3.5184860229492188, 51.14988708496094, 36.01411437988281, 11.526275634765625, 22.206893920898438, 35.169586181640625, 37.60797119140625, -12.218772888183594, 46.56813049316406, 65.43478393554688, -19.17779541015625, 59.865264892578125, -103.38604736328125, -21.79168701171875, 21.03533935546875, 60.09747314453125, 16.65704345703125, 18.857284545898438, 33.18536376953125, -16.32946014404297, 29.351669311523438, -3.10723876953125, -9.0245361328125, 1.45806884765625, 14.224151611328125, 24.265823364257812, -27.086746215820312, 8.433120727539062, 42.60345458984375, -0.82257080078125, 5.7535400390625, 40.067169189453125, -13.90789794921875, 44.783905029296875, 23.80615234375, -25.813140869140625, 1.613800048828125, 30.318374633789062, 39.391510009765625, 37.75425720214844, 0.782928466796875, 54.699951171875, 75.91961669921875, -3.025299072265625, 23.375022888183594, -3.4121932983398438, 90.57768249511719, 5.08966064453125, 91.54595947265625, -0.279510498046875, 58.5205078125, 53.632537841796875, -10.61785888671875, 26.642837524414062, -8.8026123046875, 39.211029052734375, 24.75615692138672, 49.81700134277344, 3.0464401245117188, -4.7818603515625, 19.82489013671875, 8.048566818237305, -13.11468505859375, 18.457916259765625, 49.99224853515625, -15.913993835449219, 69.68887329101562, 4.306571960449219, 3.93670654296875, 8.59735107421875, -1.25244140625, 34.70269775390625, 2.0125732421875, 55.825103759765625, 39.52253723144531, 32.960269927978516, 38.896942138671875, -0.1734600067138672, 62.68943786621094, -18.13642120361328, -1.278045654296875, 43.92755126953125, -6.1192169189453125, 2.3740482330322266, -1.588348388671875, 76.55686950683594, 11.409996032714844, 56.4716796875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000107.npy"}
{"epoch": 0.22408376963350785, "step": 108, "batch_size": 128, "mean": 18.321537017822266, "std": 32.722496032714844, "min": -72.6871337890625, "p10": -22.17826461791992, "median": 14.7349853515625, "p90": 60.35672607421874, "max": 116.388427734375, "pos_frac": 0.7578125, "sample": [52.87750244140625, -18.83489990234375, -5.7792816162109375, 24.002395629882812, 36.656005859375, 16.559921264648438, 90.2490234375, 1.597320556640625, 67.78604125976562, -5.6620025634765625, -5.424205780029297, 30.161773681640625, -32.13671875, 85.777099609375, 36.561668395996094, 17.07756805419922, -71.3826904296875, -37.57122802734375, 43.28021240234375, 18.21464729309082, 74.26971435546875, 50.458648681640625, 84.12109375, 62.65069580078125, 20.2713623046875, -26.809825897216797, 76.7034912109375, 36.86517333984375, 34.039642333984375, 12.135992050170898, 18.441452026367188, 53.179290771484375, 49.16168975830078, 11.51025390625, 14.338951110839844, 16.786705017089844, 37.124755859375, 22.1878662109375, 0.681854248046875, 0.5220909118652344, 5.60736083984375, 21.14508056640625, 26.539306640625, -6.25347900390625, 33.03038024902344, 112.23077392578125, 9.070106506347656, 13.496185302734375, 15.131019592285156, 27.396575927734375, 44.069000244140625, 24.39739990234375, 10.662498474121094, 5.927764892578125, 8.704147338867188, 0.0, 20.506851196289062, 0.3414459228515625, 39.414459228515625, 3.6933670043945312, 57.35308837890625, 14.2198486328125, 7.420433044433594, 17.10126495361328, 0.08116912841796875, 57.86601257324219, 0.1953582763671875, -11.49151611328125, 1.7066268920898438, -29.255081176757812, 5.48486328125, -25.093505859375, 38.26812744140625, 17.571212768554688, 70.2071533203125, -21.431549072265625, 0.0, 40.190826416015625, 2.7709083557128906, 1.4658222198486328, -2.785186767578125, -3.0955810546875, -11.549163818359375, 43.00347900390625, -1.5568084716796875, 12.807060241699219, 1.5613479614257812, 67.09359741210938, 4.34027099609375, 45.47711181640625, 3.920135498046875, 40.8126220703125, -27.7117919921875, -14.912155151367188, -23.92060089111328, 7.057342529296875, -25.75524139404297, 38.092620849609375, 1.7832183837890625, -34.78937530517578, 19.948516845703125, 59.097625732421875, 35.317138671875, -42.94073486328125, 0.0, -43.8751220703125, 41.693782806396484, -1.839141845703125, 0.6744480133056641, 67.69354248046875, 33.00483703613281, 11.287872314453125, 59.37359619140625, -5.945858001708984, 116.388427734375, 55.186248779296875, 15.493606567382812, 75.264404296875, 38.84783935546875, -72.6871337890625, -4.2045745849609375, 19.483062744140625, 10.98567008972168, 10.47906494140625, 20.556198120117188, 7.273193359375, 24.473663330078125, 21.861419677734375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000108.npy"}
{"epoch": 0.2261780104712042, "step": 109, "batch_size": 128, "mean": 20.35159683227539, "std": 32.687232971191406, "min": -49.0118408203125, "p10": -17.27373046875, "median": 15.727081298828125, "p90": 66.18428955078124, "max": 126.9952392578125, "pos_frac": 0.765625, "sample": [-3.9477081298828125, -9.246566772460938, -27.72736358642578, 44.7354736328125, 48.535552978515625, 72.88897705078125, 32.237335205078125, 56.238731384277344, -13.425521850585938, 7.57623291015625, 68.8822021484375, 0.4025688171386719, 25.33521270751953, 11.433265686035156, 67.21029663085938, 2.88720703125, 19.37646484375, 9.535835266113281, -38.655517578125, 21.769775390625, 1.0268783569335938, 126.9952392578125, -13.833419799804688, 30.3533935546875, 7.17919921875, 38.935028076171875, 54.456787109375, 21.72728729248047, -17.275482177734375, -8.235454559326172, 46.006492614746094, -2.16778564453125, 62.07562255859375, 17.3121337890625, 28.54937744140625, 5.652740478515625, 38.33274841308594, 69.07101440429688, 0.17723846435546875, 26.32025146484375, 10.413780212402344, 14.52435302734375, -21.923492431640625, -1.449859619140625, 30.19677734375, 23.211700439453125, -46.268310546875, 14.32476806640625, 19.25951385498047, 24.40648651123047, 22.995361328125, -21.847076416015625, -17.272979736328125, 76.23480224609375, 0.0, 17.691551208496094, 5.92315673828125, 8.655487060546875, 39.74891662597656, 1.0864601135253906, 38.592071533203125, 54.515625, -47.39599609375, -49.0118408203125, -43.462066650390625, 16.03485107421875, 24.9625244140625, 118.154541015625, 41.652587890625, 40.32958984375, 8.523117065429688, -1.7287578582763672, -15.0947265625, 14.351547241210938, 2.201904296875, 11.022003173828125, 31.7669677734375, 2.982421875, 35.616798400878906, 46.386993408203125, -31.3612060546875, 22.3338623046875, 32.705997467041016, 43.33245849609375, 69.63742065429688, 12.841583251953125, 62.613037109375, 30.924835205078125, -20.4158935546875, 88.262939453125, 45.597015380859375, 45.376953125, -32.26234436035156, 37.61541748046875, 23.414169311523438, 61.94163513183594, 1.7311782836914062, -16.13836669921875, -11.662887573242188, 4.9593505859375, 22.258636474609375, 75.79714965820312, 2.1601715087890625, 69.02252197265625, -0.6121177673339844, 37.08210754394531, 16.51043701171875, 2.4294891357421875, 65.815185546875, 13.789134979248047, -4.419097900390625, 34.28387451171875, 56.223236083984375, -29.988037109375, 0.8511161804199219, 67.0455322265625, -0.29461669921875, 45.563323974609375, 6.138916015625, 11.642608642578125, 9.249237060546875, 104.0250244140625, 5.573799133300781, 15.4193115234375, 13.21478271484375, -16.6231689453125, 9.190185546875, 43.19549560546875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000109.npy"}
{"epoch": 0.22827225130890053, "step": 110, "batch_size": 128, "mean": 19.139202117919922, "std": 34.575531005859375, "min": -76.03192138671875, "p10": -20.394855499267578, "median": 16.128753662109375, "p90": 66.21473388671873, "max": 112.226806640625, "pos_frac": 0.734375, "sample": [13.658126831054688, -76.03192138671875, -31.920307159423828, 61.45635986328125, 83.638916015625, 3.726612091064453, 71.73992919921875, 16.221649169921875, 21.90509033203125, 20.122100830078125, 47.0213623046875, 2.31793212890625, 13.146690368652344, 4.617347717285156, 49.32438659667969, -51.25099182128906, -2.386199951171875, 40.945556640625, -34.66949462890625, 57.842254638671875, 43.815185546875, 3.474151611328125, 4.778602600097656, 51.49267578125, 0.0, -15.21337890625, 42.94854736328125, 16.279693603515625, -0.28697967529296875, 5.8068695068359375, 27.471298217773438, 75.30743408203125, 6.894878387451172, 11.172752380371094, -17.85284423828125, 10.190032958984375, 57.58525085449219, 52.60560607910156, 64.475341796875, -3.202911376953125, -20.92835235595703, 26.5972900390625, -1.9146575927734375, 44.231536865234375, 21.976318359375, 14.404937744140625, 45.694610595703125, 3.796142578125, 52.67047119140625, -23.660491943359375, 0.4352550506591797, 27.560791015625, 75.3929443359375, 4.582729339599609, 9.944517135620117, 73.84469604492188, -35.9125862121582, 42.383941650390625, -0.3438453674316406, -46.01094055175781, 82.889404296875, 4.130645751953125, 43.304901123046875, 32.94085693359375, -17.08673095703125, 98.40628051757812, 27.6790771484375, 4.460662841796875, 16.34234619140625, 2.663330078125, -20.166213989257812, 36.64703369140625, 23.488037109375, 9.22323989868164, -17.26656723022461, 40.47777557373047, 9.524703979492188, -54.735015869140625, 35.84283447265625, 112.226806640625, 17.758617401123047, 32.547393798828125, 19.100540161132812, 20.791046142578125, -62.5841064453125, 36.6451416015625, 10.2774658203125, -11.982742309570312, 63.67864990234375, 9.380271911621094, 31.678924560546875, 77.98080444335938, -6.3388824462890625, 78.64614868164062, 13.9727783203125, 16.526622772216797, -0.9296512603759766, 43.75146484375, -1.37725830078125, -6.864227294921875, -18.112548828125, 10.813720703125, 25.948471069335938, -30.89105224609375, 16.0157470703125, -7.0114898681640625, 87.97964477539062, 63.077362060546875, 1.35321044921875, 16.035858154296875, 35.079139709472656, -0.31195068359375, -4.20941162109375, -27.927383422851562, -47.90406799316406, 42.76043701171875, 70.2733154296875, 53.05924987792969, 1.497528076171875, 82.16015625, 20.531082153320312, 22.90966796875, 21.60956573486328, 32.65350341796875, 2.4158935546875, 26.255264282226562, -19.667831420898438, 55.859375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000110.npy"}
{"epoch": 0.23036649214659685, "step": 111, "batch_size": 128, "mean": 20.352733612060547, "std": 36.0838737487793, "min": -83.3123779296875, "p10": -22.59910888671875, "median": 17.386821746826172, "p90": 60.98432922363279, "max": 117.24514770507812, "pos_frac": 0.75, "sample": [2.2509231567382812, -19.114013671875, 48.955352783203125, -49.439849853515625, -8.751388549804688, 102.27743530273438, -8.74957275390625, 5.018280029296875, 19.396705627441406, 71.64218139648438, -36.304969787597656, -16.498367309570312, 66.06845092773438, 48.605987548828125, 3.453521728515625, 117.24514770507812, 6.976806640625, -40.26982116699219, 2.91058349609375, 50.5770263671875, 10.934103012084961, 1.3355674743652344, 89.52389526367188, 23.797637939453125, 7.379127502441406, 42.0789794921875, -27.994964599609375, 2.7122802734375, 58.805419921875, 58.5504150390625, 68.26593017578125, 35.819091796875, 115.1741943359375, -1.9250907897949219, 50.60595703125, 83.48764038085938, -13.855010986328125, 42.566619873046875, 0.858642578125, 69.33961486816406, 20.274169921875, 29.493040084838867, 26.322998046875, 28.330322265625, -52.6866455078125, 3.01995849609375, 4.64764404296875, 16.2928466796875, 40.70672607421875, 7.111541748046875, 24.98052978515625, 3.7179718017578125, 32.04979705810547, 7.329254150390625, -52.680023193359375, -83.3123779296875, 19.32952880859375, -24.370742797851562, -13.817989349365234, -0.28765869140625, 53.607666015625, -46.639373779296875, 44.1533203125, 18.53414535522461, -2.6387367248535156, 7.5623321533203125, 72.9769287109375, 1.814697265625, 18.480796813964844, 26.064468383789062, -5.98101806640625, 53.405548095703125, 36.74737548828125, 6.09124755859375, 9.992935180664062, 55.01847839355469, 8.7947998046875, 55.12980651855469, -10.03155517578125, 96.99017333984375, 14.144180297851562, 14.447769165039062, 57.631103515625, 5.749996185302734, -41.20513916015625, 53.5361328125, -19.5150146484375, 38.45977783203125, 57.7147216796875, 50.163421630859375, 56.94786071777344, 38.50103759765625, 27.313858032226562, -35.7623291015625, 29.089813232421875, 12.7735595703125, 19.263702392578125, 21.4725341796875, -7.04150390625, 56.48630142211914, 7.443328857421875, 1.09326171875, 111.39620971679688, -8.815032958984375, -8.252861022949219, 38.706321716308594, 29.61414337158203, 43.7335205078125, 20.8577880859375, 9.637832641601562, 12.056951522827148, 23.767181396484375, -2.4783935546875, 45.424407958984375, 3.94976806640625, -27.41851806640625, 10.926025390625, 31.814773559570312, 81.42236328125, -5.1278076171875, 49.62477111816406, -1.466461181640625, 50.17106628417969, 31.90966796875, 24.988494873046875, 5.310966491699219, -22.3375244140625, -23.20947265625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000111.npy"}
{"epoch": 0.2324607329842932, "step": 112, "batch_size": 128, "mean": 13.163862228393555, "std": 39.15119171142578, "min": -83.15179443359375, "p10": -35.93384933471679, "median": 8.61333179473877, "p90": 64.17660827636718, "max": 115.0223388671875, "pos_frac": 0.6171875, "sample": [48.594573974609375, 7.738014221191406, -8.964004516601562, 18.559661865234375, -47.8603515625, 17.3763427734375, 20.586143493652344, -51.90589904785156, 51.772918701171875, 57.670440673828125, 0.22306060791015625, 46.613494873046875, 80.27610778808594, -28.434478759765625, 15.9320068359375, -2.999725341796875, 14.3836669921875, -24.563796997070312, 56.309722900390625, 70.50257873535156, -30.35244369506836, -1.5201854705810547, -60.765899658203125, -10.118293762207031, -9.63250732421875, 46.971527099609375, 15.125564575195312, -0.005260467529296875, 3.3889312744140625, 45.52642822265625, 29.960800170898438, 4.0729217529296875, -3.7893524169921875, 26.128219604492188, -5.677978515625, -76.46453857421875, -4.76055908203125, 25.78179168701172, 7.574859619140625, -8.984355926513672, -38.556793212890625, 35.89605712890625, 21.46307373046875, 54.177703857421875, -3.7225341796875, 3.7652587890625, -12.730789184570312, 0.10841751098632812, 5.732940673828125, 38.64398956298828, 23.51633071899414, -1.808187484741211, -45.233734130859375, 43.180023193359375, -15.344528198242188, -25.2149658203125, 21.66986083984375, 41.61993408203125, 26.519943237304688, 6.903900146484375, -43.8880615234375, 12.202499389648438, 20.783462524414062, -22.442108154296875, -16.16486358642578, 0.93603515625, 9.488649368286133, -2.442371368408203, 44.620574951171875, 6.9991455078125, 54.334625244140625, 67.37629699707031, -83.15179443359375, -6.3553466796875, -14.772987365722656, 27.18609619140625, -46.63005065917969, -10.229019165039062, -67.50064086914062, -7.225135803222656, 38.94305419921875, 36.83148193359375, 11.478515625, 115.0223388671875, 37.21844482421875, 67.72457885742188, -15.06414794921875, 72.7120361328125, 10.027915954589844, -18.696929931640625, 114.82516479492188, -19.545379638671875, 59.21173095703125, -27.35821533203125, 56.54461669921875, 56.44145202636719, -55.27978515625, 68.472900390625, -5.3125, 13.33840560913086, 27.532684326171875, -15.038887023925781, 51.84063720703125, 6.614501953125, 62.80531311035156, 34.84446716308594, -34.809730529785156, 40.788238525390625, -7.588415145874023, -11.577926635742188, 34.49317932128906, 111.1785888671875, -3.391864776611328, 5.1978912353515625, 73.47906494140625, 16.91409683227539, 22.948837280273438, 55.5859375, 68.60662841796875, -2.81646728515625, 87.77951049804688, 0.8665390014648438, -57.373931884765625, 23.873977661132812, 71.36570739746094, -74.68234252929688, 0.7503890991210938, 39.271026611328125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000112.npy"}
{"epoch": 0.23455497382198953, "step": 113, "batch_size": 128, "mean": 24.613222122192383, "std": 41.04722595214844, "min": -93.24880981445312, "p10": -19.813564109802236, "median": 24.284622192382812, "p90": 73.49134216308593, "max": 169.2491455078125, "pos_frac": 0.7421875, "sample": [-6.767547607421875, 40.09178161621094, 54.9453125, -3.6091651916503906, 169.2491455078125, 41.96649169921875, 47.55743408203125, 14.575592041015625, 1.3643932342529297, 0.8411026000976562, -4.305328369140625, 3.4520645141601562, -0.361480712890625, 31.580718994140625, 21.231491088867188, 38.01997375488281, -5.9914703369140625, 38.18768310546875, 6.841575622558594, 23.147384643554688, 0.5556564331054688, 49.319488525390625, 99.80621337890625, 47.358734130859375, 47.3538818359375, -29.0941162109375, 12.31109619140625, 63.8201904296875, 2.235565185546875, 64.2139892578125, -14.046241760253906, -93.24880981445312, 32.591583251953125, -3.6208763122558594, -9.891693115234375, 71.669189453125, 34.32830810546875, -35.73802185058594, 54.89964294433594, -67.2440185546875, 123.48974609375, 34.0059814453125, -0.39017486572265625, 88.725341796875, -48.350311279296875, 48.19512939453125, 38.1129150390625, 41.12626647949219, -28.905181884765625, 27.345611572265625, 9.83892822265625, 165.6209716796875, 23.513877868652344, -16.06182861328125, 36.818756103515625, 39.330291748046875, 90.22262573242188, 44.24491882324219, 13.940826416015625, -7.070137023925781, 0.767486572265625, 5.200958251953125, 38.05693054199219, 35.59965515136719, 35.56005859375, 20.724761962890625, 24.50347900390625, -10.214202880859375, 18.066680908203125, 39.40657043457031, 5.4765777587890625, 59.354461669921875, 27.08428955078125, -33.355987548828125, 36.17137145996094, 38.52093505859375, 34.108489990234375, 25.71234130859375, 31.149871826171875, -13.815109252929688, -2.5584487915039062, 48.77734375, -38.98590087890625, -0.754425048828125, 96.4468994140625, 51.6546630859375, 94.45693969726562, 103.1414794921875, 3.808563232421875, 8.292755126953125, 74.4058837890625, 76.46686553955078, 0.954193115234375, 52.15702819824219, -36.242767333984375, 39.65496826171875, 66.17510986328125, -12.127567291259766, -17.512298583984375, 2.3669700622558594, 96.30307006835938, 30.55492401123047, -16.431976318359375, 2.9703826904296875, 16.51434326171875, -26.022293090820312, 16.44427490234375, -15.9508056640625, 7.8248291015625, 28.897293090820312, -25.183183670043945, 91.37265014648438, 47.809295654296875, 58.478607177734375, 73.09939575195312, 3.2019500732421875, -15.5821533203125, 5.946533203125, 38.09913635253906, 51.421905517578125, 34.7607421875, 24.065765380859375, -27.8382568359375, 64.13330078125, 19.925682067871094, 4.50921630859375, 27.747528076171875, -58.58910369873047], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000113.npy"}
{"epoch": 0.23664921465968586, "step": 114, "batch_size": 128, "mean": 13.787893295288086, "std": 37.28483200073242, "min": -87.24896240234375, "p10": -33.01925201416015, "median": 13.91555404663086, "p90": 59.38171615600586, "max": 98.0318603515625, "pos_frac": 0.6796875, "sample": [11.30781364440918, 29.95013427734375, 28.4444580078125, 46.772159576416016, 11.731887817382812, 16.259689331054688, 29.14471435546875, -85.30889892578125, -24.51955795288086, 7.385498046875, -52.95849609375, 6.7776031494140625, 22.85869598388672, -15.235076904296875, -8.082695007324219, -8.720739364624023, -13.228912353515625, 97.26547241210938, 37.004520416259766, -29.335113525390625, -1.914306640625, 32.83213806152344, 0.98089599609375, 11.937187194824219, 14.075637817382812, 15.06610107421875, 17.583831787109375, -10.03717041015625, -28.950424194335938, 34.24273681640625, -27.78704833984375, 59.155548095703125, -9.892143249511719, 3.87847900390625, 2.2973403930664062, 14.012985229492188, 65.03024291992188, -55.0799560546875, 62.1085205078125, -3.020233154296875, 2.13720703125, -80.37335205078125, 14.902938842773438, 59.268348693847656, 43.06163024902344, -49.17161560058594, -43.73199462890625, 3.2342071533203125, 20.71893310546875, 94.8162841796875, -87.24896240234375, 49.93278503417969, 50.237525939941406, 50.9229736328125, 19.432159423828125, 0.52105712890625, -2.1519603729248047, -9.579109191894531, -36.95086669921875, 38.82310485839844, 15.088237762451172, -38.162109375, 9.411785125732422, 11.04443359375, 21.55084228515625, -14.616378784179688, -11.55419921875, 64.70257568359375, 26.563690185546875, -7.649932861328125, 5.470430374145508, -18.53533935546875, 37.981666564941406, 28.643157958984375, 16.493331909179688, 44.06884765625, -5.1791229248046875, 58.9371337890625, 59.646240234375, -42.784149169921875, -20.662750244140625, 49.91584777832031, -7.14013671875, 50.44317626953125, 98.0318603515625, 54.8045654296875, 21.2686767578125, 4.908149719238281, 0.0, 72.91912841796875, 13.818122863769531, -17.20832061767578, 20.857330322265625, 62.346588134765625, 24.669214248657227, 6.1859130859375, 28.99755859375, 62.33734130859375, 4.1766204833984375, 55.09405517578125, 9.820762634277344, 6.929069519042969, 30.454986572265625, 33.819580078125, 56.247589111328125, 33.039031982421875, 22.6806640625, 46.88031005859375, 67.55433654785156, -74.01821899414062, 73.27902221679688, -39.82403564453125, -0.7697105407714844, -11.922027587890625, 18.357620239257812, -61.284820556640625, -31.334274291992188, 0.0, 90.49288940429688, -24.18304443359375, 52.29743957519531, 0.558349609375, 5.58642578125, 57.3385009765625, 19.366546630859375, 25.304916381835938, 10.27638053894043, 58.183258056640625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000114.npy"}
{"epoch": 0.2387434554973822, "step": 115, "batch_size": 128, "mean": 13.970979690551758, "std": 40.707950592041016, "min": -85.06787109375, "p10": -28.368367004394532, "median": 9.686473846435547, "p90": 69.68814392089844, "max": 152.7454833984375, "pos_frac": 0.6171875, "sample": [110.80557250976562, -28.184066772460938, 44.30183410644531, -13.667789459228516, -6.49957275390625, -3.0855331420898438, 3.643707275390625, 14.9207763671875, -4.1675872802734375, -31.093032836914062, 47.20573425292969, 25.814193725585938, 46.021728515625, 84.41476440429688, 21.51879119873047, 139.18914794921875, -27.357025146484375, 30.209716796875, 5.672607421875, 21.5860595703125, -9.894477844238281, -45.851654052734375, -12.0123291015625, 5.223968505859375, -2.13958740234375, -6.645965576171875, -13.81793212890625, 92.66073608398438, 28.69171142578125, -15.576156616210938, 7.888725280761719, -8.1590576171875, -6.7440185546875, -31.395217895507812, -16.571502685546875, 20.860626220703125, -7.504119873046875, -37.169464111328125, -19.512710571289062, 120.10882568359375, -39.67755126953125, 15.720794677734375, 8.736770629882812, 9.1661376953125, -35.11956787109375, 12.4217529296875, -13.983642578125, -6.512115478515625, 52.26820373535156, 83.158935546875, 0.07470893859863281, 10.6654052734375, 48.74114990234375, -6.572257995605469, 10.206809997558594, 11.214239120483398, 44.05973815917969, -10.2747802734375, -85.06787109375, 11.184661865234375, -17.380126953125, 34.67845916748047, 7.507377624511719, 62.67547607421875, -14.18255615234375, 13.828889846801758, -13.428260803222656, 28.857513427734375, 11.828880310058594, 54.055999755859375, 53.73046875, 31.63232421875, -6.887046813964844, -0.3748321533203125, 25.24420166015625, 25.252113342285156, 69.39419555664062, 44.2454833984375, 71.23634338378906, -73.40628051757812, 87.4653091430664, 28.351058959960938, 25.92112922668457, 16.996984481811523, 28.3116455078125, 72.0615234375, 152.7454833984375, 5.45355224609375, 10.587390899658203, 70.3740234375, -29.064903259277344, 20.57152557373047, -78.23565673828125, 0.5304718017578125, 41.850921630859375, 18.046890258789062, -13.772903442382812, 0.19635772705078125, 36.174652099609375, -27.514114379882812, -44.80523681640625, -23.339752197265625, 50.250946044921875, -14.798828125, 24.27789306640625, 15.2479248046875, -3.3454437255859375, 23.5030517578125, -73.5753173828125, -10.090423583984375, 59.90032958984375, 29.290924072265625, 102.278564453125, 11.62359619140625, -17.671112060546875, 43.892913818359375, -28.79840087890625, -27.624847412109375, 6.8743133544921875, 3.9103622436523438, -17.965057373046875, 87.95382690429688, 21.779571533203125, 2.6141815185546875, 8.227630615234375, 12.601654052734375, -21.856216430664062, 48.266357421875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000115.npy"}
{"epoch": 0.24083769633507854, "step": 116, "batch_size": 128, "mean": 18.034038543701172, "std": 36.49895477294922, "min": -124.04241943359375, "p10": -19.72778739929199, "median": 13.708030700683594, "p90": 64.59658279418944, "max": 114.72537231445312, "pos_frac": 0.6796875, "sample": [-0.630096435546875, -20.16586685180664, 29.738739013671875, -2.9023895263671875, -9.7606201171875, 79.75625610351562, 13.331977844238281, 17.299179077148438, 50.03649139404297, 50.790924072265625, -16.502639770507812, 13.834945678710938, -39.810150146484375, 40.41680908203125, -124.04241943359375, 16.20538330078125, 0.6433792114257812, 0.0, -1.4546241760253906, 90.31906127929688, -7.0884552001953125, 26.80911636352539, 10.884185791015625, 2.93316650390625, 13.58111572265625, 57.6016845703125, 36.605255126953125, 0.0, 73.17962646484375, -9.176437377929688, 47.55207824707031, 49.96690368652344, 16.022689819335938, 27.77618408203125, 36.811798095703125, 48.51361083984375, 23.170303344726562, 20.34637451171875, -17.912216186523438, 1.6723785400390625, -67.57470703125, -6.182254791259766, 63.892723083496094, 41.535552978515625, 73.69654846191406, -19.5400390625, -10.35345458984375, 58.9261474609375, -5.238792419433594, 6.130645751953125, 114.72537231445312, 68.26797485351562, -0.5584220886230469, 4.844970703125, -37.11491394042969, 63.355926513671875, -2.0963382720947266, 19.451770782470703, -7.436187744140625, 15.826904296875, 22.70123291015625, 35.85162353515625, 90.71429443359375, 39.604644775390625, 85.649169921875, 33.14398193359375, 31.4921875, 8.548583984375, -5.699436187744141, 31.99652099609375, 24.9913330078125, -20.704673767089844, 3.590484619140625, -34.43505859375, 50.71995544433594, -4.109004974365234, 37.24298095703125, 70.41694641113281, 2.73529052734375, 8.555656433105469, 11.106231689453125, -19.254501342773438, -3.8792572021484375, 19.059425354003906, 45.69195556640625, -42.629127502441406, 21.660171508789062, 58.42805480957031, 94.71023559570312, 6.14556884765625, 12.669425964355469, 102.9451904296875, -75.16609191894531, 40.344757080078125, 8.240890502929688, 80.67149353027344, 49.17890930175781, 18.381263732910156, -8.499908447265625, -0.8189506530761719, 12.67791748046875, 19.468997955322266, -7.67926025390625, 48.410797119140625, 31.517005920410156, -34.823272705078125, 4.126033782958984, 41.2635498046875, 14.143325805664062, 56.513465881347656, 13.20281982421875, -23.257431030273438, -17.693801879882812, 12.832405090332031, 10.650917053222656, 51.757537841796875, -5.3906097412109375, -6.997215270996094, 19.0048828125, 25.212020874023438, 66.23892211914062, -39.148681640625, 9.8363037109375, 60.028289794921875, -28.045867919921875, 7.388195037841797, -17.19769287109375, 31.4117431640625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000116.npy"}
{"epoch": 0.24293193717277486, "step": 117, "batch_size": 128, "mean": 20.949085235595703, "std": 36.32061004638672, "min": -68.98538208007812, "p10": -24.455114746093745, "median": 20.253822326660156, "p90": 69.33227005004882, "max": 107.08587646484375, "pos_frac": 0.703125, "sample": [7.533573150634766, 30.730758666992188, 21.493179321289062, 38.158233642578125, 86.88360595703125, 28.471803665161133, 15.304540634155273, -7.4031982421875, 21.852859497070312, 14.352127075195312, 82.585205078125, -10.3804931640625, 77.830078125, 104.57211303710938, 0.366973876953125, 97.26629638671875, 9.14739990234375, 45.947601318359375, 40.71929931640625, 3.5994415283203125, -44.505950927734375, 22.58038330078125, -16.91663360595703, 45.01702880859375, 13.015918731689453, 24.348968505859375, -15.320571899414062, 90.3641357421875, 69.14593505859375, 15.059783935546875, 16.069473266601562, -16.325927734375, 17.437179565429688, 29.97113037109375, 6.7263031005859375, -1.4768524169921875, 35.8232421875, -0.3500518798828125, 2.6447296142578125, -13.02728271484375, 54.824127197265625, -31.84052276611328, -0.3451881408691406, 0.7498340606689453, -46.57403564453125, 91.89346313476562, 47.512603759765625, 27.02276611328125, -10.278144836425781, -12.04229736328125, 49.21905517578125, -51.487762451171875, -48.39338684082031, -5.7701416015625, 2.7396011352539062, -27.039932250976562, 20.552078247070312, -52.22314453125, -48.14042663574219, -14.694061279296875, 12.02751350402832, 5.7928924560546875, 19.95556640625, -23.347335815429688, -0.1099853515625, -27.757186889648438, 46.02473449707031, -33.753570556640625, 90.91351318359375, -1.1324005126953125, 40.048614501953125, 50.5701904296875, 14.238208770751953, 40.2098388671875, -0.0975189208984375, -47.96094512939453, -14.49755859375, 36.68309020996094, 19.267868041992188, 27.780853271484375, 1.4680213928222656, 11.995101928710938, 107.08587646484375, -68.98538208007812, -38.883575439453125, 5.9554595947265625, 59.4765625, 40.37732696533203, 28.436798095703125, -2.913473129272461, 32.699859619140625, 25.587646484375, 9.680862426757812, 58.330535888671875, -9.7186279296875, 33.07252502441406, 14.65484619140625, 46.171875, 29.34754180908203, 36.380157470703125, 0.0, 60.814849853515625, -11.245330810546875, -1.6381683349609375, 40.3504638671875, 77.59454345703125, 41.873023986816406, 73.97134399414062, 53.6085205078125, 49.21826171875, 26.665985107421875, 54.075531005859375, 88.11392211914062, 32.300872802734375, 65.4593505859375, 32.52735137939453, 52.06396484375, -5.146095275878906, 5.1420745849609375, 29.76519775390625, 67.99221801757812, 69.76705169677734, 5.533802032470703, 36.85939025878906, -3.8861541748046875, 42.96392822265625, 57.66400146484375, 28.9996337890625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000117.npy"}
{"epoch": 0.2450261780104712, "step": 118, "batch_size": 128, "mean": 23.0421199798584, "std": 44.941795349121094, "min": -75.08258056640625, "p10": -33.84593658447265, "median": 14.735411643981934, "p90": 81.48073425292968, "max": 140.598876953125, "pos_frac": 0.671875, "sample": [60.4266357421875, -14.180419921875, 45.18507385253906, -20.51123046875, 78.7552490234375, 47.04888916015625, 40.63859176635742, 56.99163818359375, 54.717010498046875, 86.46560668945312, 93.14000701904297, -19.993270874023438, 31.41693115234375, 5.2400360107421875, 35.4215087890625, -5.052276611328125, -15.357879638671875, -18.269344329833984, -53.60980224609375, 11.961067199707031, 33.89726638793945, -10.270118713378906, 45.895263671875, 4.601226806640625, -9.196701049804688, 23.672210693359375, -30.95574951171875, 6.5958404541015625, 59.05702209472656, 74.443603515625, 8.48095703125, -24.473480224609375, 52.75474548339844, 50.2388916015625, -75.08258056640625, 1.3108139038085938, -11.989898681640625, 13.367195129394531, 50.788612365722656, 30.310714721679688, 19.58428955078125, 74.3197021484375, 97.17195892333984, 3.3837890625, 67.60543823242188, 64.2989501953125, 59.5609130859375, 121.42047119140625, 25.922821044921875, -34.342193603515625, -17.046287536621094, 73.33428955078125, 21.974132537841797, 79.68539428710938, 6.270660400390625, 47.943695068359375, 93.00086975097656, 35.038421630859375, -72.09152221679688, -27.739913940429688, -54.62542724609375, 14.06195068359375, 29.077774047851562, 97.97457885742188, 67.46791076660156, 39.19219970703125, 21.736968994140625, 11.303863525390625, -2.159423828125, 59.033294677734375, 67.0777587890625, -1.6006717681884766, -45.47601318359375, -33.63325500488281, 85.09420776367188, -13.512115478515625, -37.886444091796875, 4.75445556640625, 14.129240036010742, 5.10028076171875, -4.986572265625, 39.29728698730469, -26.971893310546875, 70.538818359375, 12.855789184570312, 20.191574096679688, 1.341033935546875, -42.343780517578125, -18.510955810546875, -41.04307556152344, 115.97232055664062, -6.259033203125, 14.307035446166992, 81.0714111328125, 2.0409183502197266, -17.185379028320312, 39.94026184082031, 4.3434295654296875, -39.84393310546875, 112.10693359375, 15.163787841796875, -16.705154418945312, 70.88449096679688, 62.85333251953125, -54.51963806152344, 91.59939575195312, 42.494171142578125, 75.9830322265625, -5.707118988037109, 0.0, -22.05889892578125, 20.55211639404297, 82.43582153320312, -1.2889404296875, -37.86279296875, 126.82669067382812, 47.732696533203125, 65.31982421875, -21.469818115234375, -43.9429931640625, 70.06388854980469, 24.46368408203125, 1.1087188720703125, 12.578720092773438, 0.0, 7.0584716796875, 18.079986572265625, 140.598876953125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000118.npy"}
{"epoch": 0.24712041884816754, "step": 119, "batch_size": 128, "mean": 17.037250518798828, "std": 44.56984329223633, "min": -87.29315185546875, "p10": -40.418539428710936, "median": 13.904313087463379, "p90": 77.0254425048828, "max": 140.3831787109375, "pos_frac": 0.6328125, "sample": [-73.263671875, -52.56996154785156, 50.790679931640625, 48.90684509277344, -9.481643676757812, 52.21563720703125, 42.500244140625, 11.039031982421875, -13.06024169921875, 35.40142822265625, 27.034072875976562, -14.6051025390625, -5.313507080078125, -30.05889892578125, 126.885986328125, 29.23998260498047, 97.54144287109375, 25.298095703125, 133.58636474609375, 21.016510009765625, 45.37530517578125, -19.36456298828125, 27.444244384765625, -73.1776123046875, 32.05702209472656, 64.78707885742188, 6.57525634765625, -42.93304443359375, 112.25344848632812, 47.082916259765625, 82.99087524414062, -8.693954467773438, 6.1075439453125, 6.9775543212890625, -58.4464111328125, -29.340805053710938, 38.52861022949219, 18.511962890625, 8.152557373046875, -0.6309776306152344, -7.74786376953125, 47.27435302734375, 14.840669631958008, -31.92083740234375, -39.888641357421875, 75.93350219726562, -22.197372436523438, -9.711624145507812, 60.185791015625, -10.52984619140625, 9.765899658203125, 15.560195922851562, 103.60458374023438, 63.16473388671875, 38.2352294921875, 29.264251708984375, 29.23199462890625, 18.050048828125, 48.7547607421875, -8.594226837158203, -12.304977416992188, 42.687408447265625, 31.710479736328125, 28.85113525390625, -3.0430908203125, -20.37933349609375, 53.4449462890625, -29.544174194335938, 47.807281494140625, -50.26610565185547, -1.3699951171875, -6.90557861328125, 21.316818237304688, 12.769393920898438, 27.527137756347656, 12.96795654296875, 31.40386962890625, -3.4679527282714844, 24.96844482421875, 56.017234802246094, 23.713638305664062, 0.0, 80.85546875, -23.44573974609375, -20.232330322265625, 52.60992431640625, -28.988548278808594, 9.273590087890625, 37.00514221191406, 8.569198608398438, 41.317962646484375, -42.908355712890625, 33.29869079589844, 10.049530029296875, 33.97479248046875, -13.375076293945312, 40.44635009765625, 65.5147705078125, -21.5689697265625, 24.0640869140625, 91.03323364257812, 2.6353931427001953, -12.704696655273438, -54.10614013671875, 79.57330322265625, 17.311508178710938, 32.18720245361328, -47.333984375, 11.9866943359375, 5.9942626953125, 70.81082153320312, 117.07595825195312, 19.30902099609375, -87.29315185546875, 36.84820556640625, -46.590576171875, 2.1860809326171875, -41.65496826171875, 10.8267822265625, -85.97683715820312, -13.943328857421875, 140.3831787109375, -2.2828903198242188, 101.281005859375, 87.50376892089844, 11.487350463867188, -5.72784423828125, -25.052276611328125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000119.npy"}
{"epoch": 0.24921465968586387, "step": 120, "batch_size": 128, "mean": 19.377700805664062, "std": 44.62849426269531, "min": -92.6280517578125, "p10": -29.374630737304685, "median": 14.891792297363281, "p90": 72.99655151367188, "max": 150.046142578125, "pos_frac": 0.6484375, "sample": [-25.169891357421875, 58.517181396484375, -6.599922180175781, 39.10798645019531, 114.26885986328125, 1.1614990234375, -79.86651611328125, 20.1600341796875, 6.42218017578125, -9.532087326049805, 14.514984130859375, 5.025020599365234, 1.4878158569335938, 13.245330810546875, -16.474716186523438, -11.21389389038086, 66.82699584960938, 33.14012145996094, 59.80364990234375, 91.73968505859375, -25.485870361328125, 20.996017456054688, 51.135719299316406, 62.647117614746094, 59.388458251953125, -65.53842163085938, -40.45616149902344, -28.788711547851562, 62.258636474609375, -4.25140380859375, -53.525665283203125, 29.6968994140625, -14.060417175292969, 68.60212707519531, 62.145843505859375, -68.04194641113281, -1.9950714111328125, 38.20288848876953, 44.737457275390625, 15.887985229492188, -28.3924560546875, 103.02987670898438, 150.046142578125, 45.06475830078125, 102.2437744140625, 45.665069580078125, 44.971527099609375, 47.40205383300781, -17.0423583984375, 77.39079284667969, 72.43304443359375, 15.268600463867188, -46.954219818115234, 6.719999313354492, -10.116386413574219, 24.214752197265625, -28.221351623535156, 84.15679931640625, -13.790969848632812, 60.5458984375, 39.42041778564453, 74.3114013671875, 3.8280105590820312, -44.001190185546875, 14.15283203125, 21.842803955078125, -7.379390716552734, -30.741775512695312, 7.4670867919921875, 18.45316505432129, 16.41241455078125, -20.260528564453125, 63.778778076171875, 39.810020446777344, -8.284515380859375, -1.832855224609375, 6.636604309082031, 23.9749755859375, 5.8913116455078125, 20.9857177734375, -7.376930236816406, 34.2794189453125, 22.287078857421875, -22.550186157226562, 49.671142578125, 0.030511856079101562, 41.06626892089844, -8.743309020996094, 85.55020141601562, 63.62512969970703, 9.601776123046875, 51.739715576171875, -16.731807708740234, 43.103515625, -35.233795166015625, 143.6343994140625, 23.62976837158203, -8.86676025390625, -71.39523315429688, -0.83709716796875, 0.0, -17.07825469970703, -39.725677490234375, -22.3668212890625, -18.333621978759766, 48.078369140625, 109.17279052734375, 43.0328369140625, -7.122581481933594, 0.0, 5.764556884765625, 51.49617004394531, 7.620758056640625, 39.9833984375, 39.46075439453125, 34.48455810546875, 119.936279296875, -92.6280517578125, 83.66348266601562, 43.807647705078125, -63.057098388671875, 57.57476806640625, 37.830108642578125, 8.065582275390625, 3.3302230834960938, 12.79541015625, -8.095138549804688, 36.95106506347656], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000120.npy"}
{"epoch": 0.2513089005235602, "step": 121, "batch_size": 128, "mean": 14.089475631713867, "std": 42.248809814453125, "min": -92.36737060546875, "p10": -29.846098327636714, "median": 6.4888458251953125, "p90": 65.98844680786132, "max": 169.73748779296875, "pos_frac": 0.609375, "sample": [-6.841514587402344, 1.850250244140625, -1.4250907897949219, 1.4328155517578125, -72.58294677734375, 57.474853515625, 40.972206115722656, -55.38116455078125, 0.88531494140625, -11.264656066894531, -8.03200912475586, 99.94935607910156, -0.264678955078125, 0.0, 29.10434341430664, 8.509326934814453, 27.724510192871094, 41.979530334472656, 67.59518432617188, 28.32061767578125, -32.899658203125, 28.069580078125, 6.762847900390625, -48.799644470214844, 30.008697509765625, -9.24090576171875, -7.6645660400390625, 66.5533676147461, 40.74803161621094, -16.8572998046875, 42.15673828125, -0.3743438720703125, 34.077484130859375, -16.808273315429688, -13.150634765625, 48.76179504394531, -41.32819366455078, 42.150352478027344, 48.64642333984375, 20.2913818359375, 57.571441650390625, -7.0607757568359375, 65.55584716796875, 25.39208984375, 0.63323974609375, 37.406280517578125, 10.654876708984375, -16.253700256347656, 4.316925048828125, 13.63916015625, -38.97425842285156, 11.99462890625, 8.340032577514648, -24.32135009765625, 16.605941772460938, -10.124664306640625, 103.78515625, 1.4395751953125, -63.868804931640625, 1.3795013427734375, 66.85397338867188, -21.112159729003906, 38.52923583984375, 6.21484375, 17.7847900390625, 129.55670166015625, 15.515323638916016, -8.022369384765625, -22.77520751953125, 1.463043212890625, 52.46490478515625, -1.366617202758789, -92.36737060546875, 0.0, 34.87574005126953, -5.7135009765625, -3.692413330078125, 39.081146240234375, 65.746337890625, -12.844039916992188, 5.65838623046875, 50.34166717529297, -3.360942840576172, 7.5614013671875, -0.16864013671875, 29.11065673828125, 1.74322509765625, 48.25377655029297, -80.25357055664062, 78.817138671875, 36.13883972167969, 78.29994201660156, 13.526565551757812, -27.813011169433594, 118.78390502929688, -13.385459899902344, 65.37533569335938, 13.19753646850586, 49.537437438964844, 13.668121337890625, -11.072319030761719, 5.616035461425781, 81.55966186523438, -16.289031982421875, -25.32166290283203, 39.40155029296875, 12.61471176147461, 37.78767776489258, -39.229248046875, -50.16221618652344, -15.38491439819336, 122.15023803710938, 0.2981758117675781, 9.128036499023438, -9.221954345703125, 43.648887634277344, -51.607666015625, -17.079994201660156, 22.78179931640625, 169.73748779296875, 14.769020080566406, 4.8845977783203125, 93.57504272460938, -15.839950561523438, -0.5782756805419922, -28.537429809570312, -58.184112548828125, 29.563430786132812], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000121.npy"}
{"epoch": 0.2534031413612565, "step": 122, "batch_size": 128, "mean": 19.90597152709961, "std": 48.32355880737305, "min": -107.04896545410156, "p10": -43.15950241088867, "median": 16.151885986328125, "p90": 83.87372131347657, "max": 147.59957885742188, "pos_frac": 0.671875, "sample": [10.237030029296875, -28.316741943359375, 51.48136901855469, -34.085540771484375, 22.47808837890625, -4.5032806396484375, 55.983856201171875, -22.709922790527344, 23.306320190429688, 70.0091552734375, -40.10789489746094, 21.068939208984375, 80.39456176757812, -41.39617919921875, -2.041290283203125, 8.116233825683594, -0.35176849365234375, 66.5718994140625, 62.53588104248047, 67.36981201171875, 23.237136840820312, 91.91393280029297, 17.16796875, 12.191314697265625, 21.79407501220703, -12.386993408203125, 47.3177490234375, -51.29145812988281, 9.283203125, 16.896820068359375, 143.52996826171875, -2.759124755859375, -25.592544555664062, 54.649017333984375, 26.932281494140625, 24.13458251953125, 49.092857360839844, -64.59860229492188, 20.525665283203125, 44.63819885253906, -13.119964599609375, 64.162841796875, -29.37310791015625, -89.17034912109375, 35.67169189453125, -52.04441833496094, 16.04742431640625, 6.745796203613281, 11.12249755859375, 84.43014526367188, 93.8128662109375, 41.138648986816406, 7.558380126953125, -6.08642578125, 45.54130554199219, 51.780059814453125, -3.32720947265625, 0.09466552734375, 9.324180603027344, 47.05253601074219, 58.762725830078125, -43.07426452636719, 79.33706665039062, 134.76812744140625, -55.613800048828125, 43.928497314453125, 27.1268310546875, 0.0, -62.47550964355469, 29.765411376953125, 13.761749267578125, 67.9229736328125, 16.25634765625, 115.93804931640625, -14.442840576171875, 48.67583465576172, -0.7180881500244141, 0.6334609985351562, -79.01571655273438, 26.69464111328125, -6.571922302246094, 97.128173828125, 105.5574951171875, 38.97904968261719, 2.9891624450683594, 76.7701416015625, 52.900787353515625, 95.0791015625, 52.90336608886719, 13.415390014648438, 27.800262451171875, -1.46221923828125, 7.7747802734375, 75.04693603515625, 1.2426395416259766, 83.63525390625, 23.30670166015625, -70.31317138671875, 65.57416534423828, -107.04896545410156, -4.66569709777832, -87.71357727050781, 21.16114044189453, 12.837223052978516, 147.59957885742188, -41.8992919921875, 10.625885009765625, -52.23985290527344, 16.547943115234375, 86.3016357421875, 61.297393798828125, -6.5404052734375, 41.88374328613281, -0.473876953125, 90.43431091308594, 20.24371337890625, 4.731758117675781, -22.585254669189453, -10.54833984375, 6.8528289794921875, -2.3287734985351562, -43.35839080810547, 9.078727722167969, -46.720733642578125, -2.9922828674316406, 58.28257751464844, 95.43870544433594, 9.6968994140625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000122.npy"}
{"epoch": 0.2554973821989529, "step": 123, "batch_size": 128, "mean": 25.76892852783203, "std": 44.812904357910156, "min": -146.27304077148438, "p10": -20.4498046875, "median": 21.48236083984375, "p90": 87.46802520751953, "max": 153.72235107421875, "pos_frac": 0.7265625, "sample": [56.59495544433594, -13.767486572265625, 104.55323791503906, -20.18865966796875, 23.77093505859375, 13.37872314453125, 41.00982666015625, 69.26327514648438, -17.50909423828125, 18.961700439453125, 116.8294677734375, 88.04795837402344, 36.330078125, 107.92791748046875, -3.778575897216797, 30.812591552734375, 36.59599304199219, 9.20416259765625, 66.3509521484375, 2.751983642578125, 26.01959228515625, 19.761932373046875, 2.5534305572509766, 83.37513732910156, -6.9294586181640625, 37.01837158203125, 25.751049041748047, 8.24951171875, 105.167236328125, 31.989700317382812, -25.91192626953125, 82.15115356445312, 36.08003234863281, 4.315216064453125, 20.573822021484375, -2.1974334716796875, 31.17364501953125, -146.27304077148438, -3.30731201171875, 82.60845947265625, -51.023193359375, 7.831756591796875, 34.69810485839844, 15.731164932250977, 69.25238037109375, 100.7098388671875, 99.7119140625, 14.193527221679688, 87.219482421875, 36.76919937133789, 55.966796875, -19.085525512695312, -8.245079040527344, -12.946624755859375, 20.42791748046875, 137.4228515625, 21.93853759765625, 57.562255859375, -39.869598388671875, 22.854034423828125, 38.749114990234375, 7.19390869140625, 35.217742919921875, 26.47857666015625, 25.5582275390625, 22.411376953125, -19.04470443725586, 4.46612548828125, 23.31573486328125, -3.963653564453125, 1.8051910400390625, 45.60955810546875, 92.40151977539062, -17.360916137695312, 56.4266357421875, 51.40545654296875, -6.76275634765625, 17.291290283203125, -10.41619873046875, 86.32342529296875, 11.847450256347656, -26.921066284179688, 35.837371826171875, 58.83934020996094, 22.179229736328125, 7.417457580566406, 39.558746337890625, 75.19256591796875, 58.335968017578125, 91.81976318359375, 0.0, -31.94629669189453, -10.872379302978516, 0.48089599609375, 17.852821350097656, 0.0614013671875, 4.8546600341796875, 13.5650634765625, 10.665115356445312, -9.8785400390625, 49.662994384765625, 98.61233520507812, -10.140382766723633, 10.364532470703125, 67.66815185546875, -6.523567199707031, 40.714874267578125, 47.09800720214844, 45.141998291015625, 13.368106842041016, -115.5177001953125, 39.62559509277344, 21.02618408203125, -26.22076416015625, -36.658416748046875, 53.85888671875, 69.89448547363281, -26.122802734375, -2.65753173828125, -21.05914306640625, 153.72235107421875, 25.20294189453125, -28.4193115234375, 0.0, 111.02008056640625, -36.6693115234375, 15.894760131835938, 69.11138916015625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000123.npy"}
{"epoch": 0.25759162303664923, "step": 124, "batch_size": 128, "mean": 22.159271240234375, "std": 42.20515060424805, "min": -116.9705810546875, "p10": -25.301383209228515, "median": 20.794967651367188, "p90": 75.07108154296876, "max": 128.62289428710938, "pos_frac": 0.7109375, "sample": [-0.790740966796875, 31.86590576171875, -98.95965576171875, 8.23004150390625, -19.510704040527344, 53.408233642578125, 97.46038818359375, 86.11178588867188, -1.716278076171875, 57.355316162109375, -15.231559753417969, 39.145843505859375, 41.70904541015625, -32.89378356933594, -3.005645751953125, -51.55750274658203, 9.93267822265625, 40.320220947265625, 15.65338134765625, 104.02908325195312, 12.838165283203125, 91.70626831054688, 30.0688419342041, 62.9864501953125, 26.0540771484375, 81.25616455078125, -8.894424438476562, -21.122314453125, -12.74847412109375, 5.7173614501953125, 10.251419067382812, 55.00129699707031, 12.82847785949707, 5.56494140625, 57.02473449707031, 62.35658264160156, 52.72926330566406, 89.5574951171875, 46.696624755859375, 71.93142700195312, 37.23272705078125, -25.28057098388672, 4.533477783203125, 68.89949035644531, 46.079689025878906, 0.0, -5.426992416381836, 2.1475753784179688, 37.692283630371094, 25.24346923828125, 32.28253173828125, 44.18377685546875, 3.5945205688476562, -8.95843505859375, 75.02279663085938, -1.2374420166015625, -14.35211181640625, 13.203216552734375, -7.43536376953125, 75.18374633789062, 21.421463012695312, 105.16407775878906, -8.991701126098633, -38.84014892578125, 57.1031494140625, 108.09347534179688, 44.947357177734375, -31.32647705078125, 58.327301025390625, 3.5029144287109375, 100.00698852539062, 7.81597900390625, 21.1513671875, -25.349945068359375, -2.3916015625, -6.526744842529297, 32.240997314453125, 10.12091064453125, 35.43193054199219, -33.64387512207031, -116.9705810546875, -56.123046875, 43.34087371826172, 17.577407836914062, 12.435150146484375, 104.811767578125, -24.4049072265625, -36.57328796386719, 128.62289428710938, 58.34989547729492, 21.756683349609375, 45.363555908203125, 54.70710754394531, 67.44500732421875, 85.39804077148438, 73.92428588867188, 21.751434326171875, -16.37744140625, 29.5477294921875, 28.05108642578125, 28.597518920898438, 3.7663497924804688, -21.270248413085938, 9.818115234375, 15.70391845703125, 20.438568115234375, 66.08763122558594, -54.6513671875, 58.30375671386719, -53.036376953125, -57.354400634765625, 57.98486328125, 9.199142456054688, 34.661277770996094, 47.452056884765625, 40.697113037109375, 8.796623229980469, 7.9525146484375, 22.006729125976562, -17.244659423828125, -10.107986450195312, 58.09086608886719, 10.295440673828125, 17.495220184326172, 29.854949951171875, 22.978363037109375, 3.8882522583007812, -20.877464294433594], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000124.npy"}
{"epoch": 0.25968586387434556, "step": 125, "batch_size": 128, "mean": 19.992530822753906, "std": 52.583797454833984, "min": -106.45010375976562, "p10": -42.629557800292964, "median": 14.976966857910156, "p90": 88.93906173706054, "max": 158.32275390625, "pos_frac": 0.625, "sample": [31.51568603515625, -26.73714828491211, -101.80492401123047, 61.530029296875, 41.35546875, -4.891897201538086, -8.63818359375, 88.77893829345703, -81.994140625, 32.846954345703125, 75.0262451171875, 10.501964569091797, -66.17803955078125, 25.9814453125, 48.7706298828125, -79.8553466796875, -66.0997314453125, 31.40386962890625, 1.03656005859375, 12.810245513916016, -13.445159912109375, 59.938201904296875, 25.039031982421875, -32.85342788696289, 37.007659912109375, 113.34744262695312, -13.481414794921875, 80.89532470703125, 45.46099853515625, 134.02639770507812, 8.05836296081543, 51.9522705078125, 26.107118606567383, 51.08308410644531, 45.101226806640625, -17.150726318359375, 33.77459716796875, 81.158935546875, 19.102615356445312, -8.150917053222656, -18.20306396484375, 12.802276611328125, 34.55511474609375, -26.73255157470703, 64.27554321289062, 158.32275390625, 36.474517822265625, 10.516708374023438, 1.844512939453125, 14.31005859375, 126.934326171875, 38.32514953613281, 3.972137451171875, 0.0, 3.0198326110839844, -64.03352355957031, 5.784088134765625, 79.53244018554688, -29.48077392578125, -12.154052734375, 91.2691650390625, 26.567901611328125, 37.665283203125, 35.522216796875, -1.7918071746826172, -63.431976318359375, -20.249710083007812, 34.08429718017578, -51.04109573364258, -43.1689453125, -23.900482177734375, -19.819969177246094, 3.92486572265625, 28.6650390625, 27.5380859375, -11.03558349609375, 5.560760498046875, 46.65528869628906, 63.551605224609375, 71.81106567382812, 112.48712158203125, 120.8355712890625, -3.5188465118408203, -106.45010375976562, 89.31268310546875, -30.878799438476562, 20.318893432617188, -4.8873748779296875, 46.59650802612305, 54.010467529296875, 65.86173248291016, 1.008453369140625, 82.95846557617188, -93.987548828125, -25.77678680419922, 107.93438720703125, 68.292724609375, 38.2454833984375, -1.535858154296875, 94.74069213867188, -36.693450927734375, 53.95062255859375, -18.20562744140625, -0.8560791015625, -22.378082275390625, 158.28759765625, 10.427017211914062, -53.776702880859375, -17.782684326171875, 81.06597900390625, 3.0421142578125, -0.9715518951416016, 39.25593566894531, 36.46611022949219, 80.38336181640625, 76.71308135986328, 15.643875122070312, 105.85736083984375, -16.600906372070312, -30.855682373046875, -42.39839172363281, -2.58526611328125, -47.57232666015625, 17.032989501953125, 26.68145751953125, 107.3441162109375, -14.38232421875, -14.38812255859375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000125.npy"}
{"epoch": 0.2617801047120419, "step": 126, "batch_size": 128, "mean": 22.523632049560547, "std": 46.87846374511719, "min": -123.755126953125, "p10": -41.685028076171875, "median": 20.17633056640625, "p90": 86.3341583251953, "max": 146.27227783203125, "pos_frac": 0.703125, "sample": [43.7313232421875, 70.092529296875, 39.69274139404297, 33.24263000488281, 100.2239990234375, 2.481536865234375, 48.5355224609375, -48.210540771484375, 32.122467041015625, 51.315704345703125, -42.137786865234375, -22.713134765625, -25.229598999023438, -34.900482177734375, 70.42465209960938, -15.183841705322266, -45.7674560546875, 61.27069091796875, -3.5987625122070312, 14.177749633789062, 8.031028747558594, 30.940399169921875, -3.842132568359375, 1.22015380859375, 71.10049438476562, 85.4901123046875, -3.3358917236328125, 80.9818115234375, 75.4548568725586, 112.9512939453125, 26.502361297607422, 18.03448486328125, 43.5950927734375, 68.74114990234375, 12.583953857421875, 23.16827392578125, -41.55342102050781, 26.1361083984375, -47.803558349609375, 1.5244216918945312, 14.118843078613281, 135.64474487304688, -9.53302001953125, -0.2472858428955078, 50.371429443359375, 146.27227783203125, 24.891437530517578, 18.997879028320312, 41.39024353027344, 108.680419921875, 57.37107849121094, 9.859130859375, 91.70613098144531, 36.4390869140625, 46.259735107421875, 89.92425537109375, 102.416015625, 33.595550537109375, -57.739410400390625, 19.32342529296875, -8.869071960449219, 0.8706512451171875, 21.02923583984375, -50.58978271484375, 6.932706832885742, 9.1265869140625, -66.61834716796875, 31.62457275390625, 51.92371368408203, 46.56097412109375, 40.53253173828125, -7.968456268310547, 23.74493408203125, -40.84869384765625, -4.87396240234375, -43.96723175048828, 57.070343017578125, 15.872100830078125, -59.93157958984375, 16.890356063842773, 28.858123779296875, 69.26556396484375, 6.798282623291016, 44.079559326171875, -5.773223876953125, 95.28775024414062, -16.019500732421875, 125.51754760742188, 43.484619140625, -64.46044921875, 117.44308471679688, -41.99211120605469, 39.384033203125, -29.979827880859375, -6.604644775390625, 24.023895263671875, -3.742950439453125, 33.81022644042969, 54.24569320678711, 43.22602844238281, 34.75665283203125, 87.63616943359375, 3.811351776123047, 12.176239013671875, -6.83984375, 6.49267578125, -1.29266357421875, 29.516677856445312, 17.221267700195312, 51.0643310546875, 18.416015625, 44.34759521484375, 16.968017578125, -1.1259765625, -1.9440460205078125, -15.3067626953125, 46.331329345703125, 90.05014038085938, -123.755126953125, 6.321075439453125, 62.307403564453125, 34.417816162109375, -94.47430419921875, 3.9903717041015625, 37.433929443359375, 85.77615356445312, 0.778717041015625, -36.646270751953125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000126.npy"}
{"epoch": 0.2638743455497382, "step": 127, "batch_size": 128, "mean": 26.932846069335938, "std": 50.09208297729492, "min": -131.0501708984375, "p10": -23.63489685058594, "median": 21.006305694580078, "p90": 101.34194717407226, "max": 180.3934326171875, "pos_frac": 0.7109375, "sample": [-9.56495475769043, 5.637115478515625, -20.13677978515625, 28.318389892578125, 143.98455810546875, 7.517246246337891, 7.8326416015625, 112.0067138671875, 22.0928955078125, 135.5237274169922, -13.933929443359375, 21.06353759765625, 48.096435546875, -21.45062255859375, 30.046737670898438, 29.29742431640625, 32.17417907714844, 85.87823486328125, -42.51715087890625, 12.069961547851562, -18.093889236450195, 21.85205078125, 16.29973030090332, 16.686325073242188, 18.495269775390625, 32.019073486328125, 72.33224487304688, 13.83731460571289, 51.903839111328125, 103.46337890625, -32.32801055908203, 16.720184326171875, 24.0836181640625, 12.540298461914062, 59.3853759765625, 12.031532287597656, -29.266563415527344, -27.759963989257812, 2.310455322265625, 20.949073791503906, 27.500762939453125, 10.6630859375, 49.125518798828125, 16.026641845703125, 15.119873046875, -20.105628967285156, 40.17437744140625, 60.201507568359375, 103.33621215820312, 86.488525390625, 28.594482421875, -2.70654296875, 26.49908447265625, -2.5396881103515625, -13.918136596679688, 17.256851196289062, 97.16546630859375, 69.26107788085938, 10.55401611328125, 180.3934326171875, 72.19570922851562, 59.90771484375, -22.02753448486328, 6.7276458740234375, -33.527610778808594, -131.0501708984375, 31.4403076171875, -21.761520385742188, 105.77346801757812, -0.3350181579589844, -30.2646484375, -9.124847412109375, 54.284942626953125, 95.80364990234375, 60.71734619140625, 52.011260986328125, 71.3704833984375, 37.25813293457031, -2.91790771484375, 21.253326416015625, -8.354042053222656, -3.3788280487060547, 23.3292236328125, -119.22175598144531, 100.9249496459961, 12.351104736328125, 156.34820556640625, 112.35562133789062, 104.25338745117188, 72.298095703125, 102.31494140625, -23.579315185546875, 68.47601318359375, 37.675537109375, 17.16082763671875, 68.7723388671875, 71.53390502929688, 51.63625717163086, -6.1959075927734375, -62.6263427734375, -102.39564514160156, -34.54022216796875, 36.753662109375, 67.14334869384766, -5.838157653808594, 35.18653106689453, 54.424774169921875, 0.876007080078125, 1.08709716796875, 6.25103759765625, 102.86112976074219, 20.77178955078125, 0.9560546875, -16.449844360351562, 25.239501953125, -12.850357055664062, -1.48614501953125, -23.76458740234375, 14.37054443359375, 25.15618896484375, 57.01373291015625, -15.95440673828125, 29.744064331054688, -7.29986572265625, 42.32513427734375, -31.58978271484375, 77.87921142578125, 107.23574829101562], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000127.npy"}
{"epoch": 0.26596858638743454, "step": 128, "batch_size": 128, "mean": 31.084117889404297, "std": 51.538673400878906, "min": -100.61419677734375, "p10": -30.495075225830078, "median": 29.88111114501953, "p90": 96.4814239501953, "max": 157.430419921875, "pos_frac": 0.7421875, "sample": [53.4912109375, 75.57803344726562, 88.27767944335938, 32.84454345703125, -53.12701416015625, 103.36051940917969, -13.234291076660156, 98.5819091796875, 9.699851989746094, 61.67625427246094, 60.57215881347656, 71.34548950195312, 61.83343505859375, 73.24961853027344, 33.781219482421875, 65.2052001953125, 3.207550048828125, 19.094833374023438, 88.49957275390625, 63.2325439453125, -30.305946350097656, -9.43597412109375, 22.833251953125, 10.759590148925781, 97.75624084472656, 43.22705078125, -26.303314208984375, 43.003387451171875, -90.50497436523438, -15.6822509765625, 141.20944213867188, -13.616546630859375, -94.70233154296875, -70.232421875, -28.178466796875, 49.41114807128906, -37.158203125, 14.427749633789062, -19.724563598632812, 38.72093200683594, -26.229400634765625, -12.263710021972656, 95.93507385253906, 115.46697998046875, 11.690559387207031, 155.73928833007812, -11.165557861328125, -15.105056762695312, 34.57781982421875, 9.959327697753906, 25.914764404296875, 72.98793029785156, 114.6279296875, -30.828834533691406, 11.23138427734375, 108.49510192871094, 79.7001953125, -100.61419677734375, 56.460906982421875, -52.154205322265625, -52.913360595703125, 24.56646728515625, 42.35662841796875, 54.5751953125, 95.0264892578125, 43.010009765625, 89.7706298828125, -16.555450439453125, 47.867881774902344, -54.127227783203125, -30.352035522460938, 24.723602294921875, 23.268829345703125, 28.072372436523438, -14.04132080078125, 30.450775146484375, 17.402587890625, 36.9562873840332, 64.23056030273438, 76.56597900390625, 63.12060546875, 44.79302978515625, 83.78350830078125, 69.69427490234375, 69.4633560180664, 84.66067504882812, -22.284286499023438, 41.253814697265625, 55.92291259765625, 91.58355712890625, -28.478668212890625, -40.197174072265625, 3.915557861328125, 28.36981201171875, 59.43016052246094, 104.60076904296875, 94.38229370117188, 14.063568115234375, 152.8260498046875, 7.278738021850586, 116.65135955810547, -41.45906066894531, 14.054645538330078, 37.2639045715332, 54.627288818359375, 41.17352294921875, 20.887924194335938, 13.787679672241211, 59.1334228515625, 29.311447143554688, 0.1392364501953125, 41.65021514892578, 35.86853790283203, -31.818817138671875, 2.1209716796875, 17.43280792236328, 110.46649169921875, 6.09075927734375, 20.3270263671875, 0.0, 42.04924011230469, 21.815948486328125, -18.60650634765625, 0.4771156311035156, 157.430419921875, 38.20310974121094, 18.15179443359375, -4.601371765136719], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000128.npy"}
{"epoch": 0.2680628272251309, "step": 129, "batch_size": 128, "mean": 27.76416015625, "std": 51.73335266113281, "min": -113.84974670410156, "p10": -25.436978149414053, "median": 24.54279327392578, "p90": 89.18837509155273, "max": 241.7076416015625, "pos_frac": 0.734375, "sample": [-20.141082763671875, 85.4976806640625, -42.42546081542969, 22.78387451171875, 131.24009704589844, 30.622283935546875, 31.12805938720703, -101.5728759765625, 55.03349304199219, 92.69844055175781, 10.982192993164062, -34.83100891113281, 2.954345703125, 26.18640899658203, 75.87791442871094, 33.5584716796875, 116.81280517578125, -1.53179931640625, 23.027847290039062, 44.824493408203125, -4.90264892578125, -54.220184326171875, -44.894683837890625, 22.058746337890625, 64.73307800292969, 3.8860321044921875, 5.58782958984375, 37.12012481689453, 38.693115234375, 27.524261474609375, -1.7946929931640625, 98.14761352539062, 2.6793289184570312, 91.44684600830078, 70.67001342773438, 102.6217041015625, 69.71600341796875, 38.29443359375, 158.3408203125, 28.296279907226562, 38.40223693847656, 72.71469116210938, -12.104461669921875, 34.011627197265625, 37.84333801269531, 86.88739013671875, -5.177001953125, 0.6441669464111328, 44.111083984375, -83.76641845703125, 13.70452880859375, 26.0577392578125, 100.21540832519531, 9.841476440429688, -12.144775390625, -12.752193450927734, 7.481420516967773, -91.63760375976562, 83.12254333496094, -36.02552795410156, 7.103492736816406, 86.670166015625, -22.67919921875, 28.096923828125, 98.48001098632812, 27.025634765625, 65.99951171875, 8.951156616210938, 16.89672088623047, 22.040283203125, 51.57061767578125, 57.50897216796875, 67.82455444335938, -113.84974670410156, 77.27796936035156, 1.2296066284179688, -8.696334838867188, 52.6474609375, 19.992813110351562, 44.06486511230469, 4.4194488525390625, -6.883270263671875, 19.73797607421875, -69.29217529296875, -81.4920425415039, -43.13018798828125, 9.271636962890625, -7.2083740234375, 30.75555419921875, 138.6224365234375, -31.871795654296875, -9.2266845703125, 26.69793701171875, 10.55052375793457, 14.183456420898438, 55.35020446777344, 28.65753173828125, 9.13763427734375, -4.502479553222656, -2.2431793212890625, -0.614410400390625, 99.30154418945312, -12.738494873046875, 10.542804718017578, 241.7076416015625, 88.220458984375, 54.37176513671875, 86.78900146484375, 8.839046478271484, 6.804313659667969, 55.31427001953125, 75.7765884399414, 64.6968765258789, 43.82893753051758, -1.2875518798828125, 108.33599853515625, 30.42554473876953, 35.921875, 51.79991149902344, 17.94837188720703, -12.423599243164062, 0.97760009765625, -17.564132690429688, 79.638916015625, 3.6828460693359375, -1.9335556030273438, 62.35479736328125, 55.24561309814453], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000129.npy"}
{"epoch": 0.27015706806282724, "step": 130, "batch_size": 128, "mean": 28.2092342376709, "std": 51.753883361816406, "min": -97.44448852539062, "p10": -36.65299682617187, "median": 25.379194259643555, "p90": 94.13809814453124, "max": 170.1627197265625, "pos_frac": 0.6953125, "sample": [51.348297119140625, 16.29718017578125, 47.94462585449219, -14.18267822265625, 0.0, -17.55523681640625, 49.723602294921875, 31.772918701171875, 2.016510009765625, -38.18635559082031, 12.162864685058594, 14.503662109375, -7.99005126953125, -3.6006393432617188, 74.84465026855469, -45.641563415527344, 42.230010986328125, 134.23619079589844, 40.84710693359375, -21.1549072265625, -27.390228271484375, 16.741180419921875, 4.1280517578125, 92.373779296875, 87.96240234375, 42.654541015625, 128.47796630859375, 21.923583984375, -40.33074951171875, -12.754547119140625, 38.50263977050781, 51.977508544921875, 31.110031127929688, 30.535110473632812, 45.336639404296875, 70.85072326660156, 58.942901611328125, -72.70474243164062, -13.15372085571289, 93.59542846679688, 30.056068420410156, 69.27053833007812, -25.700519561767578, 69.33184814453125, 18.868614196777344, 67.2126693725586, -62.3155517578125, -27.59942626953125, -38.14703369140625, 9.724777221679688, 100.9757080078125, 10.160087585449219, 170.1627197265625, 83.5328369140625, 142.9156951904297, 17.416038513183594, 29.12890625, 1.8792858123779297, 20.498504638671875, -1.472564697265625, 133.76434326171875, 46.25274658203125, -3.237581253051758, 48.62367248535156, 81.08960723876953, -43.30224609375, -8.654048919677734, 119.367431640625, 81.36239624023438, 19.896255493164062, 89.19903564453125, -10.883193969726562, -5.412576675415039, 54.789398193359375, -1.81396484375, 93.5238037109375, 49.323429107666016, 4.179412841796875, 100.36669921875, -43.864013671875, 125.08059692382812, 11.2801513671875, 2.4328765869140625, 71.93161010742188, -78.4361572265625, 60.88720703125, 32.74394989013672, 41.3875732421875, 108.66287231445312, -6.326202392578125, 1.75372314453125, 42.51971435546875, 18.843162536621094, -25.48626708984375, 9.998825073242188, 47.30498504638672, 72.87876892089844, 59.87310791015625, -17.0989990234375, 22.207550048828125, -89.3011474609375, -97.44448852539062, -5.387725830078125, 108.02680969238281, 129.26834106445312, 78.71144104003906, -79.97491455078125, 9.067474365234375, -31.6075439453125, 34.16275405883789, 53.37286376953125, 80.18910217285156, -6.5765838623046875, 42.637786865234375, 12.643325805664062, 32.71436309814453, 51.797332763671875, -5.3961334228515625, 46.222076416015625, -2.1370983123779297, 11.162567138671875, 28.550838470458984, 44.211639404296875, -67.54000854492188, 41.9449462890625, -36.0126953125, 20.76888084411621, 95.40432739257812], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000130.npy"}
{"epoch": 0.27225130890052357, "step": 131, "batch_size": 128, "mean": 22.760507583618164, "std": 55.77760314941406, "min": -130.32888793945312, "p10": -49.08434906005859, "median": 24.510326385498047, "p90": 99.47562561035156, "max": 151.53427124023438, "pos_frac": 0.6875, "sample": [40.48887634277344, 13.378156661987305, 66.56741333007812, -43.005035400390625, 39.35551452636719, 0.25958251953125, -4.3157958984375, 4.47821044921875, 2.87188720703125, -103.07379150390625, 33.43121337890625, 114.79782104492188, 44.95994567871094, 27.96710205078125, -1.4719772338867188, 5.7093505859375, 43.9786376953125, -10.97726821899414, 8.535751342773438, 97.09259033203125, -4.539302825927734, 25.528648376464844, 5.182830810546875, 39.73602294921875, 46.14019775390625, -55.953094482421875, 0.4024009704589844, 37.6728515625, 39.3087158203125, 38.576202392578125, -83.81033325195312, -4.912689208984375, 43.231353759765625, 67.36267852783203, 18.50077247619629, 36.69898986816406, -62.53619384765625, 76.5452880859375, 0.33148193359375, 141.61050415039062, 30.17022705078125, 6.0587158203125, -14.034576416015625, -3.0518016815185547, -34.865966796875, 36.8167724609375, 104.38468933105469, 100.1553955078125, 119.39556884765625, 32.985137939453125, -130.32888793945312, -61.768218994140625, 18.280227661132812, -54.005584716796875, 71.66961669921875, 10.233863830566406, 55.364471435546875, 73.87234497070312, -37.721435546875, 73.19735717773438, 56.74235534667969, -40.78631591796875, 55.244171142578125, 12.771240234375, -74.7908935546875, 150.46734619140625, 53.88905334472656, 70.410400390625, 9.1322021484375, 31.87761688232422, 19.142505645751953, -32.841888427734375, 64.87487030029297, 4.38970947265625, -21.522796630859375, 67.7449951171875, -63.17132568359375, -39.03680419921875, 62.386322021484375, 124.2130126953125, 62.3780517578125, 4.323385238647461, 2.12017822265625, 25.0360107421875, 119.12083435058594, 67.5648193359375, 0.466888427734375, -64.52227783203125, 99.18429565429688, -29.5018310546875, 24.64086151123047, -14.497360229492188, 51.1837158203125, 49.21814727783203, 16.7364501953125, -14.507293701171875, 132.92498779296875, 38.816619873046875, -78.8927001953125, -21.475067138671875, 60.88165283203125, 14.559913635253906, 140.64187622070312, -49.292877197265625, 44.645713806152344, 66.2489013671875, 53.50129699707031, 105.12240600585938, -28.715896606445312, -35.9443359375, -17.83038330078125, 59.42625427246094, -101.04841613769531, 151.53427124023438, -27.226757049560547, 56.227935791015625, 108.53817749023438, 15.380615234375, 72.75518798828125, 85.91238403320312, 24.379791259765625, -5.224090576171875, 47.291473388671875, 27.43785858154297, -15.40118408203125, -48.99497985839844, -47.672454833984375, -8.157173156738281], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000131.npy"}
{"epoch": 0.2743455497382199, "step": 132, "batch_size": 128, "mean": 31.701095581054688, "std": 57.722251892089844, "min": -131.9840087890625, "p10": -36.467369079589844, "median": 24.407663345336914, "p90": 99.88106689453124, "max": 182.58099365234375, "pos_frac": 0.7109375, "sample": [-5.9276123046875, 25.44830322265625, 67.28411865234375, 78.65190887451172, 37.497955322265625, 13.315673828125, -47.87762451171875, -37.807586669921875, -4.277923583984375, 56.80052185058594, 68.025146484375, 176.93255615234375, 12.14312744140625, 24.27670669555664, 73.24095153808594, -5.060752868652344, 84.69602966308594, 15.4918212890625, 4.345691680908203, 23.364898681640625, -21.211715698242188, 103.42025756835938, -2.841796875, 7.43426513671875, -0.4266510009765625, 78.27395629882812, 90.97672271728516, -15.18255615234375, -27.442947387695312, 34.88920593261719, -88.88084411621094, 43.320770263671875, 10.861297607421875, -34.89239501953125, 64.01024627685547, 7.7202911376953125, 22.297161102294922, 42.689178466796875, -4.7403564453125, 7.907470703125, 88.78781127929688, 82.1484375, 82.70294189453125, 99.41162109375, 51.624237060546875, 94.79942321777344, 41.822418212890625, 0.823699951171875, -20.4085693359375, 33.739959716796875, 19.882232666015625, 10.937095642089844, 62.562774658203125, 71.43734741210938, 13.67901611328125, -1.5334739685058594, -131.9840087890625, 110.81576538085938, 18.144882202148438, 115.7991943359375, 1.21246337890625, 88.33978271484375, -48.139923095703125, 47.94830322265625, -33.10101318359375, 108.91455078125, 39.65473937988281, -56.27082061767578, 174.04901123046875, 77.56790161132812, -35.89299011230469, -78.82415771484375, -2.6000442504882812, 6.343082427978516, 3.2605667114257812, -85.82765197753906, -6.65386962890625, 67.81295776367188, 159.7532958984375, -4.679450988769531, 21.696029663085938, 129.4790802001953, -31.16693115234375, 24.20928955078125, 60.511962890625, 125.01077270507812, 16.569244384765625, 42.839263916015625, -15.228515625, 30.019622802734375, 56.39605712890625, 25.129684448242188, 63.34252166748047, 149.22344970703125, -98.05158996582031, 2.64727783203125, -42.6661376953125, 65.4674072265625, 51.4609375, -28.625946044921875, -25.125057220458984, 3.0669021606445312, 44.4197998046875, -1.524566650390625, 39.362091064453125, 70.7491455078125, 2.0828399658203125, 162.1639404296875, 70.71673583984375, 32.111846923828125, 39.28071594238281, -44.41534423828125, 0.0, 98.58038330078125, 24.538619995117188, 11.84521484375, 0.0, -43.364837646484375, 75.28775024414062, 100.9764404296875, 68.19500732421875, 33.07836151123047, 84.76734924316406, 182.58099365234375, -53.747802734375, 77.7447509765625, 12.101531982421875, 97.19869995117188], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000132.npy"}
{"epoch": 0.2764397905759162, "step": 133, "batch_size": 128, "mean": 28.95733642578125, "std": 52.77851486206055, "min": -116.65325927734375, "p10": -27.967331314086913, "median": 22.571788787841797, "p90": 98.1313995361328, "max": 150.53863525390625, "pos_frac": 0.6953125, "sample": [44.43682861328125, -29.152050018310547, -52.169647216796875, 0.0, -9.339700698852539, 2.6163787841796875, 78.29684448242188, 131.4288330078125, 2.55517578125, 84.05712890625, 11.988365173339844, -15.53863525390625, -5.6181640625, 26.16180419921875, 56.08148193359375, 71.46942138671875, 58.655113220214844, 0.5048675537109375, 51.29316711425781, 101.0582275390625, 21.9166259765625, 43.53253173828125, 89.09353637695312, 96.87704467773438, 8.727519989013672, -25.075729370117188, 53.257049560546875, -52.210693359375, 17.06048583984375, -18.65313720703125, -81.3896484375, 83.77230834960938, 37.5206298828125, 21.526290893554688, 0.010295867919921875, 16.519840240478516, 24.161895751953125, 47.7734375, -58.0006103515625, 42.78009033203125, 61.467376708984375, 121.32611083984375, 124.95343017578125, -19.27484893798828, -6.278564453125, -1.8556060791015625, 106.31478881835938, -12.66778564453125, 88.2384033203125, 81.19064331054688, -52.8453369140625, -17.756317138671875, 5.5485076904296875, 121.89607238769531, 37.63953399658203, 75.10812377929688, 94.43246459960938, 11.4798583984375, 3.2342529296875, 53.685829162597656, -116.65325927734375, 26.41710662841797, -13.888725280761719, 42.72369384765625, -15.05087661743164, 17.57354736328125, 67.13945007324219, -20.666839599609375, 80.90374755859375, 22.331985473632812, -26.780548095703125, -24.2218017578125, 50.39056396484375, 93.49679565429688, 42.265419006347656, 107.55401611328125, 23.169219970703125, 150.53863525390625, 7.929962158203125, 86.72854614257812, 45.993682861328125, 1.8727264404296875, 135.19735717773438, 5.189619064331055, -27.4595947265625, 13.467365264892578, -3.683349609375, 69.95791625976562, 9.29425048828125, 43.773406982421875, -16.606735229492188, 94.76553344726562, 5.672271728515625, 43.187530517578125, 69.93681335449219, -46.86503601074219, -11.23333740234375, 58.626129150390625, 78.61605834960938, 150.08694458007812, 22.81159210205078, -25.8079833984375, -99.3175048828125, 14.30609130859375, 0.0, 15.838760375976562, 102.146728515625, 37.780303955078125, -33.684295654296875, 29.14029312133789, 51.304412841796875, -11.2867431640625, -6.807243347167969, -53.03765869140625, 10.247734069824219, 141.6348876953125, -54.99687957763672, 120.49880981445312, 26.559814453125, 40.43414306640625, 30.126876831054688, 43.49455261230469, -7.275794982910156, -29.833251953125, -11.2083740234375, 4.396329879760742, 85.4512939453125, 88.11012268066406], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000133.npy"}
{"epoch": 0.27853403141361255, "step": 134, "batch_size": 128, "mean": 21.40724754333496, "std": 47.65044403076172, "min": -117.0848388671875, "p10": -31.1441047668457, "median": 18.05810546875, "p90": 79.21520538330078, "max": 178.75289916992188, "pos_frac": 0.6875, "sample": [49.079864501953125, 58.01069641113281, -58.610198974609375, -12.0020751953125, 39.129974365234375, 13.996513366699219, 52.98152160644531, -30.309860229492188, 10.060443878173828, 72.8162612915039, 91.26773071289062, 4.085573196411133, 131.39752197265625, 19.7598876953125, 18.1226806640625, 73.20464324951172, -77.0877685546875, 148.2802734375, -13.41168212890625, -4.1546630859375, 42.7818603515625, 38.3419189453125, 12.22528076171875, -73.212646484375, 73.84130859375, 15.955657958984375, 31.975975036621094, 3.64984130859375, 27.55028533935547, 79.43255615234375, 53.8946533203125, 90.16242980957031, -79.0911865234375, 7.879150390625, 40.7630615234375, 14.825958251953125, 45.71153259277344, 42.20533752441406, 80.28546142578125, 57.1563720703125, 57.0372314453125, 70.57098388671875, 0.0, 113.14971923828125, 58.9932861328125, -33.090675354003906, 22.143211364746094, -21.964683532714844, 34.963226318359375, 50.59038543701172, 1.4278373718261719, 5.59637451171875, 40.00776672363281, 74.33355712890625, -22.520782470703125, -104.46014404296875, -0.331787109375, 3.5219650268554688, 43.26365661621094, 52.63160705566406, 40.63337707519531, -13.766098022460938, 6.52703857421875, -16.041946411132812, -17.093215942382812, 33.8109130859375, -117.0848388671875, -21.123062133789062, -60.14939880371094, 38.91613006591797, 35.276611328125, 43.03948974609375, 73.4967041015625, 62.956390380859375, 43.35003662109375, -10.187835693359375, 53.874267578125, 81.56782531738281, 95.75543212890625, 3.2887344360351562, -19.78429412841797, -4.64991569519043, 13.243804931640625, 95.50672912597656, -8.558456420898438, 33.05120849609375, -11.427978515625, -13.3814697265625, 81.7330322265625, 5.838602066040039, -9.551963806152344, 20.67755126953125, 17.9935302734375, 13.092620849609375, -29.673477172851562, -13.5966796875, 20.887664794921875, 51.88909912109375, 79.12205505371094, 1.8339653015136719, 178.75289916992188, 27.585540771484375, 3.02294921875, -26.51983642578125, 44.1656494140625, 100.4727783203125, -48.60725402832031, -7.1622161865234375, -40.20252990722656, -8.31396484375, 6.526885986328125, -35.666778564453125, -0.055267333984375, 9.46527099609375, 10.224029541015625, -73.6624755859375, 33.877685546875, 1.378875732421875, -3.8145580291748047, 60.67949676513672, 27.837486267089844, 38.678558349609375, -46.88037109375, 41.390167236328125, 60.2099609375, 60.09132385253906, 6.5462493896484375, 0.0], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000134.npy"}
{"epoch": 0.2806282722513089, "step": 135, "batch_size": 128, "mean": 25.540082931518555, "std": 54.75404357910156, "min": -194.7065887451172, "p10": -28.12160835266113, "median": 17.31414031982422, "p90": 104.32616577148437, "max": 146.6170654296875, "pos_frac": 0.71875, "sample": [-4.770069122314453, -14.124069213867188, 5.072744369506836, 69.19999694824219, 65.53369140625, 3.56671142578125, 3.257781982421875, 117.56900024414062, 17.093795776367188, 4.4649658203125, 17.53448486328125, 15.249916076660156, -8.2735595703125, -54.951904296875, 12.669448852539062, 59.573455810546875, 2.422760009765625, 41.62408447265625, 26.6103515625, 44.760650634765625, 33.6627197265625, 54.746116638183594, -34.90216064453125, 102.64993286132812, 15.03192138671875, -49.78871154785156, 2.685100555419922, 2.45916748046875, 11.259124755859375, 19.85089111328125, 0.0, 31.442779541015625, -23.395736694335938, 141.22198486328125, 2.712331771850586, 5.18939208984375, 1.4839630126953125, -12.8446044921875, 42.121734619140625, 5.7733306884765625, 20.841629028320312, 33.13045883178711, -4.703285217285156, 13.704978942871094, 5.3223419189453125, -11.877883911132812, -8.944015502929688, -27.364028930664062, 70.43515014648438, 14.174263000488281, 55.50028991699219, 33.38874053955078, 19.637847900390625, 15.5491943359375, -0.0342864990234375, 146.6170654296875, -46.763458251953125, 50.1405029296875, -22.084014892578125, 120.47467041015625, -36.60198974609375, -112.89918518066406, -5.123348236083984, 95.0490951538086, 66.26188659667969, 18.607877731323242, 113.1976318359375, 37.645111083984375, 16.011428833007812, 33.61981201171875, 38.36631774902344, 73.97201538085938, -43.45013427734375, -0.14102935791015625, 46.120635986328125, 99.4010009765625, 105.52810668945312, 16.84393310546875, 75.72354125976562, 33.42242431640625, 103.84454345703125, 14.547470092773438, 52.47454833984375, 59.369964599609375, -9.50299072265625, -194.7065887451172, -29.889293670654297, -0.60064697265625, 40.270751953125, 143.72323608398438, 30.837860107421875, 105.449951171875, 11.003448486328125, 106.04803466796875, 9.7626953125, 77.01083374023438, -44.419189453125, -25.06500244140625, 58.28692626953125, 35.828125, 128.84103393554688, 21.0721435546875, 114.5130615234375, 51.4661865234375, 42.524017333984375, 72.93585205078125, 14.402420043945312, 2.2831687927246094, 97.96822357177734, 19.01837158203125, 64.33733367919922, 13.97149658203125, 45.11918640136719, -14.14776611328125, -6.9740142822265625, 83.4598388671875, 110.36309814453125, 74.23614501953125, 28.024658203125, -33.22222900390625, -24.10211181640625, -155.02825927734375, 140.63760375976562, -22.707046508789062, 43.966552734375, -82.48562622070312, -26.29351806640625, -3.4686279296875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000135.npy"}
{"epoch": 0.28272251308900526, "step": 136, "batch_size": 128, "mean": 28.542394638061523, "std": 58.8564338684082, "min": -122.30255126953125, "p10": -35.59010620117187, "median": 24.33392333984375, "p90": 94.68327484130859, "max": 230.4234619140625, "pos_frac": 0.6953125, "sample": [48.452392578125, 19.328582763671875, 25.0379638671875, 34.67375946044922, 10.91754150390625, 1.442138671875, 64.28966522216797, -3.4258270263671875, 90.45364379882812, 36.24537658691406, 3.16790771484375, -26.569366455078125, 57.213897705078125, 18.15240478515625, 56.165863037109375, -100.6767578125, 28.07433319091797, -31.448516845703125, -49.67249298095703, 31.046142578125, 121.1517333984375, -3.0843124389648438, 39.876930236816406, 63.884521484375, 62.04864501953125, -50.750030517578125, 44.6644287109375, 150.44512939453125, -51.605560302734375, -4.00701904296875, 2.1477813720703125, 37.4818115234375, 112.7373275756836, 85.8802490234375, 94.00694274902344, 69.48463439941406, -38.0697021484375, 25.704376220703125, 25.817153930664062, -19.680450439453125, -103.36373901367188, 17.206756591796875, -37.26409912109375, -8.214012145996094, 129.73019409179688, 67.66337585449219, 129.93634033203125, 189.07778930664062, 107.68453979492188, -13.34039306640625, 35.820953369140625, 2.9078369140625, -1.6158943176269531, 40.6782112121582, 23.6298828125, 14.70029067993164, 74.22515869140625, 51.515106201171875, 21.336708068847656, -8.746328353881836, 230.4234619140625, -3.1434173583984375, -19.5792236328125, 51.62687683105469, 28.66552734375, 68.55892944335938, -4.081962585449219, 6.441108703613281, 18.89508056640625, -29.155364990234375, 91.9737548828125, -117.017578125, -59.193634033203125, -7.2191009521484375, -34.8726806640625, -88.36651611328125, -28.356109619140625, 25.860366821289062, 3.2784423828125, 80.64628601074219, 15.597373962402344, 47.34295654296875, 68.48007202148438, 165.17327880859375, 104.46728515625, -122.30255126953125, -24.142379760742188, 58.229034423828125, 8.804389953613281, 96.26138305664062, -4.854698181152344, 22.41912841796875, 53.51654052734375, 68.53933715820312, 77.8093490600586, 3.7997894287109375, 77.19955444335938, 13.47430419921875, 61.77925491333008, 80.81497192382812, 70.53121948242188, -13.917472839355469, 15.30908203125, -3.6479949951171875, 45.4095458984375, 16.865753173828125, 9.564220428466797, -12.531280517578125, 159.220703125, -1.3277587890625, 73.33636474609375, 17.993148803710938, -4.064279556274414, 51.2115478515625, 67.22760772705078, -61.82366943359375, 100.7879638671875, 64.29971313476562, 80.61080932617188, 33.72688674926758, -30.9417724609375, 55.499778747558594, 11.212127685546875, -31.17729949951172, 12.999809265136719, -95.07015991210938, 56.504364013671875, 63.253021240234375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000136.npy"}
{"epoch": 0.2848167539267016, "step": 137, "batch_size": 128, "mean": 25.485727310180664, "std": 55.23785400390625, "min": -106.12310791015625, "p10": -43.05842895507812, "median": 18.257720947265625, "p90": 113.82890014648437, "max": 173.61187744140625, "pos_frac": 0.6953125, "sample": [-84.3731689453125, 7.636322021484375, 56.628173828125, 9.490798950195312, 64.86705017089844, 139.65106201171875, -31.731842041015625, -18.3388671875, -8.876968383789062, 114.37103271484375, -11.150634765625, -35.120086669921875, -87.74270629882812, 11.221221923828125, -22.96661376953125, 113.55474853515625, 16.37908935546875, 120.05746459960938, -61.83879089355469, 1.3704605102539062, 113.5965576171875, 12.09958267211914, 39.480323791503906, 63.61711120605469, 14.4920654296875, 116.0703125, 20.0594482421875, 125.8175048828125, -32.5213623046875, 48.88653564453125, 56.1092529296875, 77.86146545410156, 95.31771850585938, 25.22931671142578, 173.61187744140625, -59.85003662109375, 61.30732727050781, 25.988502502441406, 4.0086669921875, -45.529876708984375, 53.46350860595703, 34.0626220703125, 20.39093017578125, -4.2357177734375, 8.379959106445312, 18.3258056640625, 28.48028564453125, 58.80522155761719, 1.229736328125, -48.82391357421875, 5.09857177734375, 30.09210205078125, -66.06698608398438, -41.999237060546875, 68.36376953125, -96.17861938476562, 10.4608154296875, 51.332122802734375, 70.18975830078125, 37.761932373046875, -22.069564819335938, 24.397201538085938, -67.65451049804688, 10.549293518066406, 17.474197387695312, -5.36688232421875, 20.187026977539062, 14.846878051757812, 23.656341552734375, -0.6618213653564453, 26.824241638183594, -6.60076904296875, -106.12310791015625, 19.09564971923828, -4.8231201171875, -54.02952575683594, 12.48687744140625, 79.81402587890625, -29.85711669921875, 10.79046630859375, 128.69708251953125, -52.41822814941406, 123.22084045410156, 41.12786102294922, -1.058349609375, 120.6309814453125, -24.509227752685547, 46.57464599609375, 20.708969116210938, 101.81903076171875, 7.7045745849609375, -18.088333129882812, -6.4196929931640625, 17.386455535888672, 42.04071044921875, 42.50299072265625, 41.91644287109375, 142.08233642578125, 14.976242065429688, 3.12451171875, -54.9840087890625, 21.75030517578125, 33.51288604736328, -27.462936401367188, 82.8353271484375, -11.664764404296875, 122.39743041992188, 18.18963623046875, -5.52874755859375, -2.4631805419921875, -11.75927734375, 61.892730712890625, -2.729734420776367, 112.64285278320312, 78.30569458007812, 141.6385498046875, 53.75291442871094, 19.157123565673828, 42.405487060546875, 54.23118591308594, -10.011436462402344, 18.16546630859375, 3.166637420654297, 154.07693481445312, 56.34248352050781, 61.1612548828125, 11.328369140625, 18.99561309814453], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000137.npy"}
{"epoch": 0.2869109947643979, "step": 138, "batch_size": 128, "mean": 29.769168853759766, "std": 64.07267761230469, "min": -108.565185546875, "p10": -49.54883728027343, "median": 25.714378356933594, "p90": 112.72762145996093, "max": 177.701416015625, "pos_frac": 0.6328125, "sample": [177.5924072265625, 45.7652587890625, 60.45079040527344, -48.23651885986328, 13.8450927734375, 32.30058670043945, 8.402751922607422, 49.89714050292969, -80.16122436523438, 34.114593505859375, -56.428314208984375, -44.93385314941406, 42.65069580078125, -18.095474243164062, 39.27970886230469, 45.794158935546875, 110.28704833984375, -81.46902465820312, 98.7220458984375, 91.23741149902344, 31.862350463867188, 53.622955322265625, -31.095184326171875, -1.7828216552734375, 57.711944580078125, 76.5728759765625, 83.8421630859375, 65.6044921875, 15.656539916992188, 126.42303466796875, 9.5372314453125, 53.204654693603516, -39.61204528808594, 37.687225341796875, 27.6695556640625, 138.64634704589844, -52.268280029296875, 97.91155242919922, 82.49981689453125, -22.477615356445312, -4.834442138671875, 61.24908447265625, -79.30796813964844, 19.999618530273438, 94.36782836914062, -40.318939208984375, 129.51400756835938, 11.736846923828125, 70.63961791992188, -11.1907958984375, -62.00859069824219, 43.2003173828125, 107.46868896484375, 11.374954223632812, 177.701416015625, -48.38336181640625, -105.4932861328125, -43.45033264160156, 12.60150146484375, -16.12525177001953, 156.27703857421875, -45.20367431640625, 98.69451904296875, -108.565185546875, -9.21795654296875, 115.14529418945312, 31.004600524902344, -5.38775634765625, 60.70756530761719, -4.346126556396484, 109.41609954833984, 116.05853271484375, -99.79061889648438, 63.66163635253906, 23.759201049804688, 68.2884292602539, 102.2496337890625, 100.50932312011719, 65.27656555175781, -43.461669921875, -27.031784057617188, 122.77310180664062, 47.15895080566406, -11.182785034179688, 17.60594940185547, 11.0484619140625, -25.322860717773438, -66.24337768554688, -8.048412322998047, -42.87017822265625, 55.90232849121094, 114.38723754882812, 20.677539825439453, 66.68663024902344, 63.9930419921875, -7.80694580078125, 104.21139526367188, 82.54254150390625, 13.652498245239258, -24.934776306152344, 32.47198486328125, 107.21559143066406, -0.50384521484375, 14.045757293701172, -3.5094833374023438, 140.70126342773438, -47.36358642578125, -14.760589599609375, -62.906494140625, 17.6053466796875, -5.050506591796875, -20.1329345703125, -55.753082275390625, 82.74102783203125, 134.08290100097656, 2.4200057983398438, -26.818893432617188, 112.016357421875, 6.935546875, 82.26669311523438, 176.860595703125, 71.32135009765625, -12.251354217529297, -4.14471435546875, 83.274169921875, 56.30085754394531, 34.295806884765625, -54.15330505371094], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000138.npy"}
{"epoch": 0.28900523560209423, "step": 139, "batch_size": 128, "mean": 20.82543182373047, "std": 56.16794967651367, "min": -98.14053344726562, "p10": -50.84348220825195, "median": 18.648380279541016, "p90": 89.66229553222657, "max": 170.21466064453125, "pos_frac": 0.625, "sample": [-79.37446594238281, 30.725677490234375, -55.38421630859375, 33.07524108886719, 89.80685424804688, 89.23403930664062, 47.964385986328125, -1.915700912475586, 114.59127807617188, 26.18665313720703, 6.693611145019531, 5.932323455810547, -22.62110137939453, 88.82867431640625, 8.239288330078125, -7.759246826171875, 86.7587890625, -21.404296875, 7.743803024291992, -1.832977294921875, 15.629318237304688, -72.46685791015625, -33.22462463378906, 3.3618202209472656, 54.41497802734375, 19.591094970703125, 54.50782775878906, 67.58184814453125, -38.596527099609375, -45.097686767578125, -72.78413391113281, -98.14053344726562, -42.6728515625, 49.786956787109375, 32.04280090332031, 48.4195556640625, 39.771392822265625, -6.079345703125, -2.79998779296875, 103.81698608398438, 6.402229309082031, 80.64791870117188, 19.366348266601562, 72.50523376464844, -46.10888671875, -57.4989013671875, 84.78856658935547, 95.770263671875, 27.19586181640625, 83.53692626953125, 1.4441680908203125, 58.114906311035156, 86.83746337890625, 26.374755859375, -13.700836181640625, 23.755897521972656, 53.376800537109375, -32.39777374267578, 127.3076171875, -70.327392578125, 0.0, 17.205718994140625, -19.571060180664062, 111.47356414794922, 21.016876220703125, -16.324874877929688, 80.4659423828125, 0.8319091796875, -54.929359436035156, -9.991531372070312, 2.149810791015625, -31.937179565429688, -22.433319091796875, 8.375076293945312, -4.04640007019043, 80.39715576171875, -28.221664428710938, 14.73956298828125, -67.83648681640625, -34.7274169921875, 30.4183349609375, 141.74954223632812, 34.655296325683594, -74.20950317382812, -27.935104370117188, -60.969512939453125, 62.35161590576172, 155.27163696289062, 36.308837890625, -7.553070068359375, -5.9610595703125, 119.9188232421875, -39.27348327636719, -43.21685791015625, 10.605331420898438, -14.10736083984375, 99.91804504394531, 32.51885986328125, -39.637542724609375, 86.20977783203125, 35.03263473510742, 20.001708984375, -92.50320434570312, -49.09239196777344, -6.459163665771484, -83.27786254882812, 6.375799179077148, -10.304557800292969, 89.600341796875, -11.212930679321289, 54.7188720703125, 30.162031173706055, 105.39605712890625, 137.75946044921875, 48.82763671875, 82.19741821289062, 17.93041229248047, 88.58474731445312, 170.21466064453125, 44.716644287109375, 35.09967041015625, 41.0594482421875, 88.8748779296875, 47.48710632324219, 27.540435791015625, -6.58953857421875, 36.826171875, 23.048011779785156], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000139.npy"}
{"epoch": 0.29109947643979056, "step": 140, "batch_size": 128, "mean": 32.187522888183594, "std": 52.1945686340332, "min": -115.283935546875, "p10": -29.969738006591797, "median": 30.688663482666016, "p90": 99.98264312744139, "max": 163.15313720703125, "pos_frac": 0.7109375, "sample": [24.67401123046875, 89.041015625, 138.4799041748047, -42.37925720214844, 110.21719360351562, 98.88728332519531, -84.48507690429688, 62.984161376953125, 1.424713134765625, 96.55816650390625, 109.11334228515625, -8.9327392578125, -44.0218505859375, 163.15313720703125, 20.281471252441406, 56.08159637451172, -12.96832275390625, 83.18045043945312, 22.535015106201172, -14.6260986328125, 0.0, -2.559053421020508, 38.91461181640625, -30.15454864501953, 26.816375732421875, 11.037200927734375, -0.9409732818603516, 87.1246337890625, 92.1531982421875, 13.520774841308594, 72.05647277832031, 18.692550659179688, -15.806098937988281, 46.735931396484375, -29.23535919189453, 41.10374450683594, 48.519439697265625, -2.39453125, 102.84239196777344, 103.6956787109375, 31.693038940429688, -41.597076416015625, 59.81889343261719, -60.70086669921875, 109.31629943847656, 4.3133392333984375, 2.2445449829101562, 17.48175048828125, 0.0, -76.44178771972656, 0.954071044921875, -59.802581787109375, 3.197235107421875, 110.6322021484375, -29.890533447265625, 52.28173828125, 88.10784912109375, 57.96938705444336, 70.47610473632812, 12.91336441040039, 44.72114562988281, -10.623992919921875, 34.487579345703125, -71.80891418457031, 0.24334716796875, -9.433616638183594, -27.784767150878906, -25.131484985351562, -9.289070129394531, -64.91609191894531, 45.98106384277344, 3.6775474548339844, -18.944808959960938, 53.97027587890625, -115.283935546875, -0.2075347900390625, 40.416015625, -4.652839660644531, 14.8040771484375, 15.576667785644531, 131.1297607421875, 33.95451354980469, 34.34210205078125, 68.76527404785156, 19.4117431640625, -28.865188598632812, 70.74981689453125, 10.942535400390625, 68.28717041015625, 56.78607940673828, 19.82085609436035, -1.099853515625, 53.42510986328125, 153.1956787109375, 28.61517333984375, -3.7867355346679688, 46.11077880859375, 61.32524871826172, 20.24732208251953, 73.65127563476562, 31.392425537109375, 54.088623046875, 97.26943969726562, 33.38482666015625, 70.21063232421875, 87.8880386352539, 124.43988037109375, 78.53353881835938, 74.45596313476562, -5.188323974609375, -32.39521026611328, 58.62518310546875, 67.30108642578125, 90.80081176757812, 8.437660217285156, 77.19172668457031, 32.31669616699219, 29.984901428222656, 102.53848266601562, 2.44873046875, 89.77317810058594, 15.689773559570312, -1.4719314575195312, 94.86128234863281, 36.3818359375, -33.0799560546875, 117.20361328125, 59.823150634765625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000140.npy"}
{"epoch": 0.2931937172774869, "step": 141, "batch_size": 128, "mean": 33.40550994873047, "std": 60.48497772216797, "min": -173.98202514648438, "p10": -39.372248840332034, "median": 32.950958251953125, "p90": 103.71729736328123, "max": 161.0968475341797, "pos_frac": 0.75, "sample": [131.73643493652344, 26.890838623046875, -48.65397644042969, -21.76849365234375, -37.54632568359375, 106.89190673828125, 34.32403564453125, 97.316162109375, 33.31695556640625, 94.26535034179688, 56.46118927001953, 92.27468872070312, 76.897216796875, 20.085098266601562, 33.58563232421875, 62.585540771484375, -20.913909912109375, 98.9306640625, 21.855247497558594, 34.957969665527344, 16.251113891601562, 143.05665588378906, 36.38731384277344, 58.140045166015625, 88.46316528320312, -16.87139892578125, 5.44232177734375, 102.12921142578125, 123.844970703125, -73.42808532714844, 32.5849609375, 17.36480712890625, 59.7630615234375, 59.80400848388672, 17.132904052734375, 78.64242553710938, -39.25511169433594, -81.86112213134766, 9.720443725585938, -41.658538818359375, 107.568603515625, 21.76763916015625, 37.11927032470703, 114.1302490234375, 16.16205596923828, 17.282012939453125, 94.13116455078125, 41.060455322265625, -12.280876159667969, 88.89308166503906, 6.8375244140625, 42.25447082519531, -75.76434326171875, 59.3707275390625, 49.10621643066406, 85.343505859375, -6.220283508300781, 161.0968475341797, -173.98202514648438, 22.29498291015625, 59.574615478515625, 26.294448852539062, 92.16986083984375, 24.317901611328125, -84.693359375, 145.519775390625, 7.1776885986328125, 43.627044677734375, 129.58343505859375, 55.99481201171875, 60.20391082763672, -15.1717529296875, -20.393808364868164, -29.628555297851562, 90.09710693359375, 20.230789184570312, -5.0614776611328125, 14.057884216308594, 46.68312072753906, -23.033172607421875, 28.504568099975586, 75.77750396728516, 46.46875, 75.43922424316406, 43.22760009765625, 10.348016738891602, 86.15228271484375, 102.35675048828125, 2.029449462890625, 34.27197265625, 31.679290771484375, 18.432044982910156, -32.47589111328125, 76.06671142578125, 137.3143310546875, -26.133636474609375, 98.62911987304688, 151.28854370117188, 50.3089599609375, 88.93777465820312, -55.5582389831543, 57.003509521484375, 63.115020751953125, -18.34760284423828, 16.573654174804688, 157.393310546875, 22.9410400390625, -36.7740478515625, 86.66995239257812, 91.35357666015625, -39.64556884765625, -56.968109130859375, 130.46697998046875, -24.64703369140625, 0.0, 3.616008758544922, 71.19635009765625, -52.13787841796875, 0.37308692932128906, 10.40155029296875, 12.451301574707031, -161.38922119140625, -62.031585693359375, -13.932022094726562, 53.08495330810547, 70.2532958984375, 5.4407196044921875, 23.512191772460938], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000141.npy"}
{"epoch": 0.29528795811518327, "step": 142, "batch_size": 128, "mean": 26.854646682739258, "std": 61.38580322265625, "min": -167.15744018554688, "p10": -44.9729034423828, "median": 16.74403476715088, "p90": 108.64945373535154, "max": 173.96063232421875, "pos_frac": 0.6640625, "sample": [22.035400390625, 6.989372253417969, 22.909873962402344, 30.37042236328125, -2.395601272583008, -56.005584716796875, 7.492912292480469, 2.218791961669922, -20.160079956054688, -9.130081176757812, 145.0697021484375, 42.709266662597656, 48.954933166503906, 106.44073486328125, 13.4757080078125, -13.620222091674805, 150.30673217773438, 83.91604614257812, 68.8258056640625, -35.59906768798828, 101.36154174804688, 6.87371826171875, 91.783935546875, 173.96063232421875, -114.88571166992188, 104.3648681640625, 118.03912353515625, 49.403900146484375, 15.343132019042969, 54.37939453125, 66.18447875976562, -0.07904052734375, 17.949159622192383, -20.419189453125, 4.070098876953125, -8.97454833984375, 21.080780029296875, 45.45294952392578, -55.63165283203125, 56.39436340332031, 65.44125366210938, 33.69065856933594, 0.0, 96.67945861816406, 34.26904296875, 25.701278686523438, -20.611618041992188, 6.776542663574219, 88.73635864257812, 6.3684234619140625, -1.5008392333984375, -30.017166137695312, -80.0277099609375, -5.77099609375, -73.37762451171875, 1.1534423828125, -19.2083740234375, 20.643089294433594, 15.538909912109375, 135.7396240234375, 45.7039794921875, -3.669187545776367, 32.11590576171875, -26.54522705078125, 116.73248291015625, 81.52276611328125, 105.01449584960938, -42.427398681640625, 17.978515625, 113.80313110351562, 128.4232177734375, 37.19502258300781, 35.373046875, 49.710357666015625, 28.412139892578125, 0.5926361083984375, -2.1800079345703125, 95.16996765136719, -27.3343505859375, -25.805633544921875, 12.509246826171875, 10.100698471069336, -60.75537109375, 58.48222351074219, 12.775794982910156, -71.388916015625, 122.42864990234375, 11.025238037109375, 64.23634338378906, 13.205604553222656, -14.819713592529297, 7.150108337402344, -167.15744018554688, 101.309326171875, -0.3997154235839844, -5.936187744140625, -38.950103759765625, 64.3450927734375, 41.03773498535156, 72.91177368164062, 41.133544921875, -86.57699584960938, 11.032766342163086, 116.3614501953125, -0.2738838195800781, -0.2462158203125, 4.327911376953125, 36.404449462890625, -6.93267822265625, 81.5943603515625, -26.87420654296875, -78.39886474609375, -60.48297119140625, 59.97050476074219, 69.85873413085938, -50.91241455078125, 69.87991333007812, -28.825668334960938, 153.73037719726562, -117.161376953125, 95.2144775390625, 92.61752319335938, 127.18341064453125, 3.4191970825195312, -0.134429931640625, 57.48212432861328, 168.24868774414062, 72.17784118652344], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000142.npy"}
{"epoch": 0.2973821989528796, "step": 143, "batch_size": 128, "mean": 32.6292724609375, "std": 54.9504280090332, "min": -123.8812255859375, "p10": -26.274612426757812, "median": 27.863444328308105, "p90": 107.64567260742187, "max": 216.68853759765625, "pos_frac": 0.7265625, "sample": [47.31135559082031, 125.31536865234375, 123.307861328125, 52.73388671875, 144.50169372558594, 84.09010314941406, 26.08702850341797, -15.308258056640625, 106.781982421875, 41.0377197265625, -42.653709411621094, 122.2374267578125, 9.829574584960938, -123.8812255859375, 46.373046875, 55.459228515625, 6.748332977294922, 3.9703903198242188, 79.9910888671875, 53.052001953125, 0.18137168884277344, 127.77239990234375, -17.40379524230957, 39.3203125, 6.088165283203125, -1.82110595703125, 9.09136962890625, 64.59078979492188, -13.941238403320312, 115.18017578125, 43.4122314453125, 115.28829956054688, 105.54946899414062, 18.78338623046875, 55.30931091308594, 17.191482543945312, -106.25393676757812, -5.874900817871094, 68.61550903320312, 2.61126708984375, -35.896514892578125, -14.97894287109375, 44.37559509277344, 99.45951843261719, 52.31322479248047, 82.32878112792969, 38.56451416015625, 51.3907470703125, 86.2369384765625, -10.539886474609375, 64.04974365234375, 88.43009948730469, 120.86672973632812, 61.517059326171875, -26.476806640625, 36.46553039550781, 13.11431884765625, -20.12982177734375, -21.318511962890625, -8.39590835571289, 54.006072998046875, 96.16110229492188, -85.3541259765625, 19.525184631347656, -26.187957763671875, -2.9858322143554688, 68.69937133789062, -12.342315673828125, 12.290199279785156, -36.01116943359375, 15.043655395507812, 91.32940673828125, 44.17906188964844, 50.05204772949219, 71.58294677734375, 127.52520751953125, -9.830078125, 27.969968795776367, 2.4189300537109375, -0.11985015869140625, 27.756919860839844, 4.1183319091796875, 5.472988128662109, -6.53619384765625, -30.021148681640625, 154.4678955078125, 109.66094970703125, -3.7941455841064453, 6.4057159423828125, 30.524887084960938, 69.05216979980469, -17.256866455078125, -62.82879638671875, 92.2783203125, -64.0118408203125, -7.105499267578125, 66.64794921875, 21.761123657226562, -7.137279510498047, 10.777069091796875, 31.353961944580078, 5.932647705078125, 35.773529052734375, -13.991241455078125, 216.68853759765625, -49.69805908203125, 1.98968505859375, 67.12522888183594, -6.045928955078125, 3.8703536987304688, 166.00723266601562, 58.03973388671875, -56.48870849609375, 47.07035827636719, 21.366989135742188, 68.95056915283203, 1.994894027709961, 16.7783203125, -31.582794189453125, 62.49066162109375, 16.83477020263672, 52.110626220703125, 74.96627807617188, 43.665618896484375, 66.47402954101562, 34.27789306640625, 36.0965576171875, 4.258977890014648], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000143.npy"}
{"epoch": 0.2994764397905759, "step": 144, "batch_size": 128, "mean": 26.424888610839844, "std": 59.68101501464844, "min": -153.419189453125, "p10": -39.75260772705078, "median": 21.706340789794922, "p90": 103.31498565673827, "max": 250.95184326171875, "pos_frac": 0.6953125, "sample": [0.6911888122558594, 75.26049041748047, 67.06707763671875, 21.11937713623047, 2.12286376953125, 197.57000732421875, 157.27474975585938, 86.48114013671875, 22.27118682861328, 77.38475036621094, 18.125892639160156, 3.428680419921875, -48.324798583984375, -54.717559814453125, 21.141494750976562, 99.04119873046875, 56.21440124511719, 13.578369140625, 70.24609375, 128.15054321289062, 15.907524108886719, -6.54107666015625, -16.7515869140625, 61.381011962890625, 35.22407531738281, 51.998130798339844, -42.59617614746094, -55.662567138671875, 102.1904296875, -15.410430908203125, 33.290924072265625, 0.979217529296875, 1.8046417236328125, 44.80938720703125, -4.9942626953125, -83.91513061523438, 67.15933227539062, 150.298828125, -10.125015258789062, 63.83937072753906, -10.253143310546875, 61.6739501953125, 59.049530029296875, 0.3951873779296875, 57.262657165527344, -153.419189453125, -110.90226745605469, -61.53660583496094, 55.03688049316406, 79.76171875, 14.728530883789062, 38.664955139160156, -28.29351806640625, -2.882669448852539, 0.0, -33.8402099609375, 105.93894958496094, 76.0549545288086, 2.022735595703125, 51.0772705078125, 63.355072021484375, 52.583396911621094, 35.224884033203125, -11.024383544921875, 0.38159942626953125, 157.16448974609375, -3.49725341796875, 1.321685791015625, -9.260751724243164, -83.54617309570312, 28.508544921875, 19.9508056640625, 8.142822265625, -5.708282470703125, 30.594154357910156, -7.955772399902344, 23.9866943359375, -23.940635681152344, -30.916336059570312, 34.7955322265625, 142.9581298828125, 30.63543701171875, 40.17439270019531, 74.53176879882812, -54.75465393066406, -29.942047119140625, 1.73724365234375, -91.29722595214844, 0.5450363159179688, 128.188720703125, 89.23977661132812, 68.93179321289062, 38.451507568359375, 46.651031494140625, 6.092620849609375, 106.77435302734375, 25.53826904296875, 108.32296752929688, 10.353759765625, 63.149932861328125, -5.28803825378418, -69.5644302368164, -24.32611083984375, 35.707794189453125, 41.42578125, -14.085966110229492, -68.819091796875, 2.886829376220703, 42.156982421875, 48.0418701171875, -38.533935546875, -1.879180908203125, 1.2701416015625, 35.89601135253906, 45.10710144042969, 250.95184326171875, 42.373321533203125, 45.84270095825195, 62.78094482421875, 17.772216796875, 40.681915283203125, 3.0690269470214844, -0.24822998046875, 110.01324462890625, -37.122894287109375, -10.535873413085938, 107.95953369140625, 24.856063842773438], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000144.npy"}
{"epoch": 0.30157068062827225, "step": 145, "batch_size": 128, "mean": 33.939605712890625, "std": 55.89336395263672, "min": -109.554443359375, "p10": -26.300773239135744, "median": 36.98478698730469, "p90": 106.87394714355469, "max": 197.3524169921875, "pos_frac": 0.71875, "sample": [-12.4736328125, 55.10682678222656, -109.554443359375, 24.991668701171875, 43.75291442871094, 44.728668212890625, 188.60910034179688, 25.499237060546875, 27.6512451171875, 47.33917236328125, -18.5799560546875, 10.186225891113281, 43.97969055175781, 43.07867431640625, 49.10646057128906, -30.87347412109375, 115.4970703125, -18.38823699951172, 77.63580322265625, -16.557220458984375, 103.05731201171875, 130.28237915039062, 61.50523376464844, 56.96320343017578, -83.97356414794922, 63.08012390136719, 16.713314056396484, 38.26177978515625, 57.00767517089844, 50.5821533203125, -12.328506469726562, -7.35272216796875, 0.6703701019287109, -34.47784423828125, 70.61587524414062, 49.8404541015625, 109.9708251953125, -83.66541290283203, 30.54132080078125, 2.446044921875, -21.810157775878906, -16.72515869140625, 62.14984130859375, 15.902933120727539, -15.912857055664062, 107.12612915039062, 69.4708251953125, 62.320953369140625, 44.518157958984375, 58.41434097290039, 67.75213623046875, -27.8446044921875, -26.271015167236328, -77.64530944824219, 11.6571044921875, 57.52227020263672, 184.4031982421875, 25.200965881347656, 19.751388549804688, 85.86654663085938, 40.48927307128906, 46.64476013183594, 69.17062377929688, 79.221435546875, 117.81918334960938, -69.75680541992188, 18.90203857421875, 121.59761047363281, 42.43711853027344, 24.39776611328125, 7.657501220703125, 158.6678466796875, 6.056266784667969, 97.1925048828125, -67.7105712890625, 127.6483154296875, -21.85467529296875, -9.946231842041016, -75.51627349853516, 13.27054214477539, 47.54652404785156, 57.89532470703125, 65.9052734375, 106.765869140625, 36.1419677734375, -21.17388916015625, 65.90064239501953, 42.5445556640625, -42.53480529785156, -7.205879211425781, 1.421173095703125, 0.0, 33.576812744140625, 55.99571228027344, -8.186155319213867, -8.645378112792969, 38.46540832519531, 97.0406494140625, 15.708450317382812, 54.08837890625, 145.69955444335938, 37.827606201171875, 106.4232177734375, 89.29791259765625, -9.331146240234375, 54.460968017578125, 22.043182373046875, 74.85039520263672, 39.8443603515625, 51.402130126953125, -45.193695068359375, 20.5277099609375, 12.849143981933594, 51.01286315917969, -26.370208740234375, 24.914642333984375, -0.9663772583007812, -2.7841873168945312, -25.072418212890625, 136.25558471679688, 66.73485565185547, 39.17303466796875, 29.666976928710938, 9.285598754882812, 197.3524169921875, -7.92449951171875, 8.596054077148438, -10.268707275390625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000145.npy"}
{"epoch": 0.3036649214659686, "step": 146, "batch_size": 128, "mean": 33.87481689453125, "std": 58.054561614990234, "min": -134.91244506835938, "p10": -30.198899078369138, "median": 27.106319427490234, "p90": 103.48929138183594, "max": 213.50619506835938, "pos_frac": 0.75, "sample": [47.629234313964844, 7.4913330078125, 12.173942565917969, -23.919097900390625, 78.5076904296875, 28.566802978515625, -11.55029296875, 60.166168212890625, 188.36294555664062, 35.893218994140625, 19.876930236816406, 21.28997802734375, 102.86351013183594, 52.19146728515625, 7.4058380126953125, -24.549285888671875, 50.2467041015625, -22.845733642578125, -34.99713134765625, 30.347076416015625, -39.32080078125, 42.43096923828125, 20.140609741210938, 31.974945068359375, 1.201995849609375, 42.5545654296875, 5.392856597900391, -22.140869140625, 35.871559143066406, 147.62530517578125, 28.80145263671875, 85.24383544921875, 18.01708984375, 60.31695556640625, -25.079574584960938, 23.084243774414062, 86.6767578125, 37.68377685546875, 43.47265625, 17.89374542236328, 60.168853759765625, 50.805320739746094, 69.16290283203125, -48.34556579589844, 37.011409759521484, 24.88092041015625, 125.95083618164062, 127.484375, 10.356491088867188, -104.74726867675781, 85.26715087890625, 3.9574432373046875, -28.208709716796875, -1.6348876953125, 25.645835876464844, 30.632598876953125, 55.66070556640625, 10.260726928710938, -4.499237060546875, 29.298065185546875, -39.475494384765625, 98.4410400390625, 14.139862060546875, 83.13449096679688, 90.93854522705078, -79.779541015625, 143.78701782226562, 96.934326171875, 2.0736083984375, 97.76177978515625, 4.0015869140625, 213.50619506835938, -45.312408447265625, -11.738616943359375, 19.777442932128906, 103.67572021484375, -134.91244506835938, 68.6528091430664, 43.50071716308594, 115.60912322998047, 13.5478515625, 191.9055633544922, -38.2904052734375, 149.7623291015625, 94.55563354492188, -8.72991943359375, 100.98248291015625, 50.55415344238281, 0.0, 5.256721496582031, -46.23735046386719, -4.3169708251953125, -12.330230712890625, 18.632644653320312, 56.75189208984375, 89.78082275390625, 41.823333740234375, -28.928131103515625, 35.21258544921875, 14.767562866210938, 0.0, 114.86895751953125, -4.22283935546875, -9.44819450378418, 44.274070739746094, 29.035400390625, 14.936073303222656, 8.711536407470703, 7.84149169921875, 66.55850219726562, 18.311241149902344, 75.9686279296875, 68.50588989257812, 6.6984100341796875, -5.3924560546875, 64.99606323242188, 61.60630798339844, 166.7226104736328, 103.40939331054688, -100.912109375, 37.861053466796875, -55.83740234375, 21.1346435546875, 112.89166259765625, 10.578201293945312, -33.164024353027344, 24.93377685546875, 49.592071533203125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000146.npy"}
{"epoch": 0.3057591623036649, "step": 147, "batch_size": 128, "mean": 26.08901596069336, "std": 61.2332763671875, "min": -169.9886474609375, "p10": -37.248478698730466, "median": 18.748600006103516, "p90": 106.3060821533203, "max": 207.91351318359375, "pos_frac": 0.671875, "sample": [9.328720092773438, -2.56854248046875, -15.78961181640625, 22.703018188476562, 14.529659271240234, 152.169677734375, 25.094329833984375, -56.557830810546875, -11.95068359375, 79.66287231445312, 73.7733154296875, -21.15949249267578, 6.217617034912109, 52.64976501464844, -36.13832092285156, 149.51174926757812, 127.62841796875, -15.685546875, 28.6728515625, 144.13140869140625, 52.80859375, 125.6854248046875, 2.877166748046875, -168.57553100585938, 63.63307189941406, -1.2350616455078125, 10.18426513671875, 93.39521026611328, -58.17835998535156, 21.285076141357422, 50.69349670410156, 62.43479919433594, -14.969917297363281, -27.654052734375, 150.2613525390625, -38.87640380859375, -21.928504943847656, -17.72076416015625, -16.052734375, 54.17718505859375, -9.820037841796875, 44.503173828125, 81.42218780517578, 36.778900146484375, 25.31903076171875, 50.65533447265625, 104.75445556640625, 10.70330810546875, 76.1300048828125, 10.578788757324219, -93.61239624023438, 9.14630126953125, -36.55079650878906, 14.043987274169922, 109.92654418945312, 11.802490234375, 74.36589050292969, 61.786224365234375, 5.884857177734375, 56.257080078125, 35.960662841796875, 77.31240844726562, 62.264923095703125, -34.191162109375, 0.0, 133.18328857421875, 9.4713134765625, 15.424705505371094, 26.97149658203125, 74.26368713378906, 50.84315490722656, 42.414283752441406, -7.334136962890625, 159.14088439941406, 15.093841552734375, 99.41061401367188, 207.91351318359375, 35.58808135986328, 55.169219970703125, 20.416412353515625, -26.2261962890625, -44.636260986328125, -66.74749755859375, -18.915252685546875, 1.599212646484375, -11.407596588134766, 27.705596923828125, -9.392242431640625, -51.877105712890625, 127.01731872558594, 54.3106689453125, 54.1129150390625, -9.481216430664062, 64.56875610351562, 102.09982299804688, 91.47073364257812, 23.975067138671875, 73.70707702636719, 7.7982177734375, 17.080787658691406, -34.315391540527344, 57.015625, -31.790283203125, 4.180450439453125, 40.010986328125, 6.54583740234375, 122.5550537109375, 21.437957763671875, 48.5086669921875, 75.27227783203125, 118.26353454589844, 21.342185974121094, 14.497217178344727, 26.890846252441406, 0.0, -0.0522308349609375, -38.8802490234375, -24.120346069335938, -95.07122039794922, 61.78271484375, -52.719390869140625, -99.8126220703125, -169.9886474609375, -1.9499588012695312, 85.43585205078125, 7.1047515869140625, -14.626523971557617, 13.253936767578125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000147.npy"}
{"epoch": 0.3078534031413613, "step": 148, "batch_size": 128, "mean": 29.0433349609375, "std": 64.83516693115234, "min": -131.6356201171875, "p10": -42.411846160888665, "median": 18.025190353393555, "p90": 120.26630630493163, "max": 224.44189453125, "pos_frac": 0.6328125, "sample": [67.92669677734375, 35.346893310546875, 58.48860168457031, 1.2867431640625, -50.00054168701172, -131.6356201171875, -18.121734619140625, 20.001007080078125, 63.77490234375, 7.26068115234375, 126.71865844726562, 12.508247375488281, 36.21881103515625, 35.70172119140625, 15.754138946533203, 30.46002960205078, 0.0, 5.9673004150390625, 126.53948974609375, 25.523178100585938, -45.75181579589844, 9.289810180664062, -35.817604064941406, 98.09170532226562, 5.067222595214844, -0.3824462890625, 47.527740478515625, 10.61279296875, 63.717315673828125, 109.83737182617188, -92.89710998535156, -15.479560852050781, -4.046546936035156, 65.30195617675781, 0.08915328979492188, 47.231964111328125, 224.44189453125, -29.51776123046875, -104.98822021484375, 72.99905395507812, -71.118408203125, 141.55889892578125, -85.64517211914062, -1.1652984619140625, 15.300947189331055, 95.83837890625, -32.74188232421875, 181.49594116210938, 64.65576171875, 47.7508544921875, 7.452121734619141, -25.285316467285156, -34.44416809082031, 65.26731872558594, 128.52740478515625, 38.265228271484375, 160.079833984375, 54.83354187011719, 3.3546104431152344, 27.773895263671875, -57.775146484375, 20.962066650390625, 123.28875732421875, 0.0, 109.4185791015625, 44.64720153808594, 7.701717376708984, 60.00813293457031, 215.20855712890625, 1.2560844421386719, -53.037017822265625, -0.513214111328125, -26.47320556640625, -56.8856201171875, 42.54978942871094, -28.451019287109375, 50.27473449707031, 66.24369812011719, 118.9709701538086, 205.5894775390625, -20.462860107421875, -54.95037841796875, 67.36323547363281, -2.78839111328125, -8.0953369140625, 1.3043060302734375, 16.049373626708984, -22.814483642578125, -21.980010986328125, -62.119354248046875, 38.83167266845703, 20.153091430664062, 5.5946044921875, 28.63604736328125, 77.38168334960938, -8.226642608642578, -75.83380126953125, 137.35336303710938, 25.021697998046875, 104.72224426269531, -11.636917114257812, -15.661348342895508, -40.980430603027344, -9.723724365234375, 30.025970458984375, -20.03356170654297, 38.55230712890625, 38.55860900878906, 67.29263305664062, 89.3019027709961, -1.281646728515625, -2.724212646484375, -35.2169189453125, 74.6004638671875, -16.340415954589844, 103.8131103515625, 94.14453125, 61.83404541015625, 69.49278259277344, 88.34827423095703, -2.2951221466064453, -7.0356903076171875, 104.259033203125, 39.99455261230469, -36.82421875, 137.47235107421875, 126.79067993164062, -12.105384826660156], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000148.npy"}
{"epoch": 0.3099476439790576, "step": 149, "batch_size": 128, "mean": 37.296199798583984, "std": 55.14160919189453, "min": -87.64859008789062, "p10": -28.06291809082031, "median": 34.019081115722656, "p90": 109.09493408203124, "max": 186.98895263671875, "pos_frac": 0.734375, "sample": [96.595703125, -26.23785400390625, 43.18487548828125, 70.01570892333984, 18.380279541015625, 14.605804443359375, 0.5498828887939453, -6.4558563232421875, 60.2894287109375, 4.1885833740234375, -31.9862060546875, -37.34656524658203, 81.25689697265625, 46.06201171875, -0.6318740844726562, -5.48565673828125, -6.254425048828125, 30.05877685546875, 37.44316101074219, -27.610565185546875, 107.9931640625, 33.59686279296875, 111.17414855957031, 59.72969055175781, 71.43594360351562, 9.194915771484375, 128.075927734375, 109.770751953125, 62.74658203125, 41.47166442871094, -4.45050048828125, 41.136444091796875, 127.29779052734375, 89.18798828125, 27.319534301757812, 3.3922805786132812, 108.8052978515625, -12.259634017944336, 9.13116455078125, 4.40533447265625, 81.40457153320312, 36.57447814941406, 82.46221923828125, 34.44129943847656, -9.3046875, 49.239990234375, -37.64277648925781, 108.541015625, 7.2901611328125, 18.365013122558594, 47.891387939453125, 103.66192626953125, -17.04693603515625, 16.802749633789062, 74.38932037353516, -29.118408203125, 76.547119140625, 55.5614013671875, 125.08978271484375, 8.8084716796875, 110.02937316894531, 160.91798400878906, -65.34024047851562, -76.56585693359375, -0.31878662109375, 18.434829711914062, 88.56817626953125, -8.7822265625, 65.30691528320312, 164.58251953125, 10.058380126953125, -21.59758758544922, 102.67729949951172, -22.84759521484375, -60.911094665527344, 39.6767578125, -41.7989501953125, 35.6793212890625, 86.40879821777344, -4.6182861328125, 23.23617935180664, 33.03904724121094, 67.63104248046875, 48.97776794433594, 186.98895263671875, 57.849609375, -47.14215087890625, 18.203048706054688, -87.64859008789062, 16.989471435546875, 4.370758056640625, 90.42393493652344, 32.61753845214844, 63.079498291015625, -20.678497314453125, 149.41680908203125, 92.62957763671875, 29.519134521484375, 40.20513916015625, 10.092483520507812, 67.30995178222656, 75.1732177734375, 47.55943298339844, -41.971954345703125, 90.866455078125, -0.9596710205078125, 55.32090759277344, 44.79840087890625, 16.303295135498047, 70.95558166503906, -61.9755859375, 143.0762939453125, 92.86041259765625, 63.520042419433594, 171.52386474609375, -19.80768585205078, 30.14508056640625, 123.2982177734375, 45.2637939453125, -39.11004638671875, -7.9060821533203125, -19.30029296875, 0.2294921875, -20.61663818359375, 79.20109558105469, 7.3138885498046875, 0.646881103515625, 47.098968505859375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000149.npy"}
{"epoch": 0.31204188481675393, "step": 150, "batch_size": 128, "mean": 30.875411987304688, "std": 58.08030700683594, "min": -111.67633056640625, "p10": -43.52482147216796, "median": 26.765403747558594, "p90": 107.64606094360349, "max": 185.88558959960938, "pos_frac": 0.7109375, "sample": [16.113739013671875, 36.04002380371094, -7.443813323974609, 125.3023681640625, 38.68634033203125, 40.060333251953125, 45.128265380859375, 125.97430419921875, 6.766876220703125, 66.29137420654297, 10.27203369140625, 105.0914306640625, 83.58358764648438, 96.4522705078125, 24.859289169311523, 177.10934448242188, 82.8504638671875, 42.72657775878906, -15.887283325195312, 13.120819091796875, 36.707794189453125, -7.4918212890625, -49.631988525390625, -3.164997100830078, 44.49720764160156, -77.16696166992188, -22.486801147460938, -23.034988403320312, 14.444900512695312, 69.05062866210938, 23.249069213867188, 11.669723510742188, 66.51708984375, -78.35073852539062, 24.207015991210938, 29.240234375, 102.09075927734375, -101.08709716796875, 79.98954772949219, 62.37358093261719, 74.06634521484375, -18.934600830078125, 26.8709716796875, 23.400421142578125, 36.7589111328125, 56.449127197265625, 113.88140869140625, 63.05181884765625, 70.51426696777344, 73.11624145507812, 37.658477783203125, 93.86993408203125, 12.273651123046875, 147.42874145507812, 1.95703125, 57.777496337890625, -1.9782791137695312, 61.1370849609375, -110.38482666015625, 185.88558959960938, 48.276397705078125, -58.62562561035156, 79.7637939453125, 47.080657958984375, -57.921905517578125, 129.40109252929688, 60.157989501953125, 98.30364990234375, 36.593017578125, 87.17369079589844, 11.7589111328125, 99.87913513183594, 7.234498977661133, -0.150543212890625, -49.6209716796875, -24.554153442382812, 41.7120361328125, 20.531248092651367, -78.4376220703125, 3.097991943359375, -0.894744873046875, -2.519622802734375, 39.713584899902344, -40.91218566894531, 11.072595596313477, 11.521392822265625, 32.7618408203125, 117.46673583984375, 44.798553466796875, 118.74346923828125, 27.011962890625, -27.098388671875, 0.0, 60.433502197265625, 92.77288818359375, -21.68658447265625, 77.4793701171875, -0.36444091796875, -52.015594482421875, 1.6962890625, -1.3033065795898438, -61.847373962402344, -37.37696075439453, 6.48712158203125, 94.06607055664062, 45.99894714355469, 99.18890380859375, 2.66253662109375, -4.728179931640625, -13.467952728271484, -111.67633056640625, 127.88448333740234, -37.3031005859375, 116.82675170898438, 26.659835815429688, 15.890419006347656, 123.87786865234375, 79.45272827148438, 23.66131591796875, 65.88360595703125, 16.645111083984375, 50.97460174560547, -77.93405151367188, 1.5349884033203125, -7.459262847900391, 12.166748046875, 113.60686492919922, -29.4739990234375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000150.npy"}
{"epoch": 0.31413612565445026, "step": 151, "batch_size": 128, "mean": 46.719383239746094, "std": 62.01521682739258, "min": -124.10214233398438, "p10": -31.69053802490234, "median": 44.510162353515625, "p90": 124.4008834838867, "max": 210.86102294921875, "pos_frac": 0.7421875, "sample": [44.2730712890625, -22.36444091796875, 3.730449676513672, -48.808258056640625, -31.565673828125, 30.87671661376953, 73.51148223876953, 36.00962829589844, 27.19134521484375, 88.76286315917969, 84.71463012695312, 57.42346954345703, 63.83488464355469, 181.36053466796875, 89.64727783203125, -35.890289306640625, 75.75369262695312, 79.43264770507812, -5.556121826171875, 29.7344970703125, 107.22930908203125, 108.75173950195312, -31.981887817382812, 142.09796142578125, -63.154541015625, -21.524139404296875, 86.7762451171875, 66.93405151367188, -14.439300537109375, 6.642864227294922, 167.696533203125, 4.76910400390625, 5.152824401855469, 136.986328125, 106.40672302246094, -27.351821899414062, 44.74725341796875, 71.54180908203125, -13.59510612487793, 210.86102294921875, -70.07572937011719, 52.894805908203125, 24.306915283203125, 38.099586486816406, 4.847295761108398, -36.10005187988281, 64.27033996582031, 80.74267578125, 79.2232666015625, 65.09039306640625, -124.10214233398438, -20.67266845703125, -58.005615234375, 48.00444030761719, 37.26336669921875, 62.93534851074219, 163.21279907226562, 6.5315704345703125, 98.36140441894531, -1.9811534881591797, 165.56524658203125, 58.955230712890625, 188.7001953125, 94.69134521484375, 57.87220764160156, 45.8118896484375, -7.52508544921875, 101.70948028564453, 64.05609130859375, 87.01750946044922, 34.484039306640625, -11.34228515625, 32.043609619140625, 78.01995849609375, 43.04901123046875, -20.089271545410156, 30.1207275390625, 22.087921142578125, 28.10894775390625, 43.1392822265625, 91.52777099609375, 75.76409912109375, 42.08985900878906, 102.32701110839844, 161.09371948242188, 67.84332275390625, 19.179901123046875, 32.93065643310547, 0.0, 11.6827392578125, 79.91311645507812, 50.30303955078125, 91.7266845703125, -2.995574951171875, -7.2711181640625, 14.147674560546875, 55.359107971191406, 32.955230712890625, -48.2960205078125, -45.86360168457031, 161.33694458007812, -19.583328247070312, 105.86164855957031, -39.43853759765625, 121.86152648925781, 130.3260498046875, 79.08917236328125, 13.338455200195312, -5.620697021484375, 163.9835205078125, 37.687408447265625, -30.208999633789062, 98.24612426757812, 72.63536834716797, 156.8387451171875, -41.3646240234375, 78.06791687011719, 46.25274658203125, 38.665863037109375, 96.796875, 113.86869049072266, -8.43267822265625, 121.276611328125, 65.62776184082031, 71.80284118652344, -20.2110595703125, 39.78118896484375, -60.76405334472656], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000151.npy"}
{"epoch": 0.3162303664921466, "step": 152, "batch_size": 128, "mean": 38.818275451660156, "std": 70.17227172851562, "min": -106.21710205078125, "p10": -41.30230255126952, "median": 30.300323486328125, "p90": 125.88880310058593, "max": 264.38909912109375, "pos_frac": 0.6953125, "sample": [169.237060546875, 210.43911743164062, 104.17622375488281, 41.57164764404297, 119.30047607421875, 104.5885009765625, 109.23544311523438, 23.530227661132812, 29.79107666015625, 127.37136840820312, 33.516998291015625, -23.5958251953125, -15.374605178833008, 0.95013427734375, 7.7952728271484375, 3.0813446044921875, 170.712646484375, -18.0240478515625, 26.823272705078125, 0.807647705078125, 31.171875, 15.59442138671875, 18.6898193359375, -20.36309814453125, 122.91354370117188, -72.37230682373047, -1.3541240692138672, -18.480987548828125, -90.50701904296875, 50.31451416015625, -27.125106811523438, -96.45707702636719, 34.577362060546875, 49.55108642578125, 21.727569580078125, 43.25103759765625, 79.80587768554688, -0.76922607421875, 112.35488891601562, -6.538902282714844, -55.373321533203125, 43.673309326171875, -81.1395263671875, -24.148529052734375, 51.40101623535156, -75.13385009765625, -9.055427551269531, -10.238616943359375, 50.54387664794922, 16.58917236328125, 8.684322357177734, -38.7012939453125, 53.9210205078125, 121.206298828125, 106.666015625, 71.45904541015625, -20.34857177734375, -5.500274658203125, 81.88482666015625, 51.20050048828125, 46.037574768066406, 50.685638427734375, 61.033905029296875, 113.47355651855469, 144.59332275390625, 16.693084716796875, 98.69271850585938, 130.55908203125, -5.61181640625, 218.26373291015625, -98.689697265625, -20.5728759765625, 4.135345458984375, 85.3446044921875, 27.373748779296875, 8.609901428222656, 30.8095703125, -59.426513671875, -2.0386734008789062, 11.601882934570312, -5.534793853759766, 28.573638916015625, 6.9908599853515625, 92.65167236328125, -50.07275390625, 125.25341796875, 86.47105407714844, 10.269384384155273, 137.51422119140625, -5.234477996826172, 38.34712219238281, -106.21710205078125, 90.41116333007812, 33.7197265625, 110.78018188476562, 66.64693450927734, 131.26962280273438, -33.458251953125, 80.949462890625, -20.799545288085938, 59.747955322265625, 3.314229965209961, -60.13783264160156, 83.62680053710938, 242.3731689453125, 4.0972900390625, -2.4311256408691406, 71.31719970703125, 264.38909912109375, -47.37132263183594, -75.40461730957031, 53.07073974609375, 50.743675231933594, 96.28802490234375, 161.7730712890625, 61.32765197753906, -29.3988037109375, -13.100761413574219, 96.39827728271484, 44.677703857421875, -13.7034912109375, 122.30448913574219, 136.57823181152344, 59.69055938720703, 19.55126953125, 24.36858367919922, 0.9879302978515625, 64.05368041992188], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000152.npy"}
{"epoch": 0.3183246073298429, "step": 153, "batch_size": 128, "mean": 55.08311462402344, "std": 65.16935729980469, "min": -98.64404296875, "p10": -10.733506774902343, "median": 44.072052001953125, "p90": 143.04004821777343, "max": 268.1402587890625, "pos_frac": 0.796875, "sample": [95.02032470703125, 4.073860168457031, 128.80484008789062, 15.10772705078125, 116.439697265625, -10.364471435546875, 30.712127685546875, 20.030609130859375, 4.8109130859375, 9.927093505859375, 16.348419189453125, 144.82688903808594, 7.89654541015625, 28.306991577148438, 123.82916259765625, 63.08526611328125, 42.598236083984375, 60.658050537109375, -21.1109619140625, -6.261993408203125, 3.175935745239258, -1.2563934326171875, 21.50531005859375, -1.05242919921875, 91.07203674316406, -10.677642822265625, 125.91268920898438, 143.39639282226562, 21.17041015625, 47.24029541015625, -8.560934066772461, 145.73095703125, 99.80319213867188, 138.1627655029297, 142.8873291015625, 3.367523193359375, 25.337646484375, 94.08236694335938, 7.308286666870117, 0.0, 145.12918090820312, 83.24081420898438, 89.2123031616211, 8.702041625976562, -98.64404296875, -13.140266418457031, -34.526275634765625, 233.8790283203125, 70.390380859375, 118.14016723632812, -10.352951049804688, 78.07308959960938, 41.72576904296875, 89.89764404296875, 164.8588409423828, 12.330940246582031, 78.06330871582031, 43.4822998046875, 2.5969009399414062, 0.346710205078125, 30.04718780517578, 268.1402587890625, 135.3270263671875, -6.5814666748046875, 117.60287475585938, 12.804718017578125, 90.23904418945312, 2.7332191467285156, 147.309326171875, 69.45144653320312, 174.383544921875, 62.40397644042969, 57.488037109375, -9.196456909179688, 44.66180419921875, -10.521589279174805, 83.25363159179688, 99.0701904296875, -22.299545288085938, 103.86483764648438, 26.355117797851562, 65.87991333007812, 17.988983154296875, 15.623920440673828, -13.296356201171875, -14.06561279296875, 73.24606323242188, 107.86505126953125, 75.41341400146484, 2.642822265625, 19.799224853515625, 72.30618286132812, -30.47637939453125, 72.49873352050781, 121.69000244140625, 66.71154022216797, 10.9373779296875, 49.42286682128906, 79.8893051147461, 20.606773376464844, 98.7506103515625, 4.466766357421875, 85.83120727539062, -16.168487548828125, -0.09447479248046875, 74.70428466796875, -46.1163330078125, 58.41693115234375, 28.60101318359375, 10.912200927734375, -10.734756469726562, 179.7469482421875, 3.4578857421875, -10.73297119140625, 83.89846801757812, 119.65899658203125, 40.06248474121094, 238.7239990234375, 154.64907836914062, 39.58154296875, 138.09617614746094, 133.65390014648438, 191.77886962890625, 56.6236572265625, -15.32708740234375, -97.5559310913086, 100.42495727539062, 47.356689453125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000153.npy"}
{"epoch": 0.3204188481675393, "step": 154, "batch_size": 128, "mean": 43.34696578979492, "std": 77.98604583740234, "min": -173.86846923828125, "p10": -51.29969177246093, "median": 38.73490524291992, "p90": 132.79850769042966, "max": 384.65478515625, "pos_frac": 0.75, "sample": [167.90074157714844, 114.94209289550781, 12.84429931640625, 247.446533203125, 97.9569091796875, 164.81942749023438, 21.3489990234375, 55.930030822753906, 66.41372680664062, -98.94955444335938, 384.65478515625, -19.07928466796875, 36.94573974609375, 28.434906005859375, 17.837020874023438, -1.0782546997070312, 40.40138244628906, -173.86846923828125, -7.133720397949219, 5.392333984375, 48.420440673828125, -39.145599365234375, 113.87274169921875, 9.466278076171875, -49.28680419921875, 5.341560363769531, -19.386741638183594, 8.580963134765625, 44.74159240722656, 61.85121154785156, 23.578460693359375, -2.999845504760742, 61.6336669921875, 28.046470642089844, 8.86184310913086, 31.96282958984375, 77.7972412109375, 128.2647705078125, 77.12680053710938, 122.9000244140625, 82.59962463378906, 25.943260192871094, -1.8311767578125, -5.019989013671875, 28.526611328125, -27.429779052734375, 45.3065185546875, 62.161895751953125, -21.338211059570312, 25.272674560546875, 31.17564582824707, -55.996429443359375, 136.81472778320312, 174.66024780273438, -24.9246826171875, -119.5635986328125, 42.96685791015625, -94.579345703125, 61.016265869140625, 25.123779296875, 175.77682495117188, 112.9542236328125, 2.5530128479003906, 58.50921630859375, 14.787399291992188, 59.580322265625, -75.03117370605469, 122.29251098632812, 121.66256713867188, 125.06488037109375, 122.55401611328125, 52.77836608886719, 17.79437255859375, 7.338409423828125, 26.74212646484375, -8.151779174804688, 154.85647583007812, -58.32142639160156, -4.44500732421875, -83.4783935546875, 138.50759887695312, 110.02291870117188, 55.19110107421875, -97.59029388427734, 105.96272277832031, 80.9677734375, 24.022598266601562, 47.9925537109375, 42.572662353515625, -4.675262451171875, 61.795654296875, -73.73619079589844, -98.23321533203125, -40.413612365722656, 120.65127563476562, -32.461578369140625, 40.34333038330078, 69.7139892578125, 131.0772705078125, -128.299072265625, -62.953887939453125, 190.637939453125, 137.21368408203125, 21.194557189941406, 56.53521728515625, 180.89047241210938, 83.74435424804688, -11.081642150878906, -13.992382049560547, 37.12648010253906, 44.31736755371094, 110.41471862792969, 11.642242431640625, 0.9576416015625, 56.186737060546875, 41.717041015625, 70.3721923828125, 81.36495971679688, 170.9974365234375, 77.4419937133789, 119.642333984375, 36.285484313964844, 63.01409912109375, 25.849761962890625, 31.980854034423828, 26.4241943359375, 74.13823699951172, 91.4476547241211], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000154.npy"}
{"epoch": 0.3225130890052356, "step": 155, "batch_size": 128, "mean": 37.09147262573242, "std": 73.06877136230469, "min": -181.20416259765625, "p10": -36.21214752197265, "median": 27.657328605651855, "p90": 117.69642028808592, "max": 351.27349853515625, "pos_frac": 0.7109375, "sample": [105.13729858398438, -12.876007080078125, 10.65875244140625, 8.116195678710938, 92.97102355957031, 78.64793395996094, 15.503997802734375, -181.20416259765625, -60.3773193359375, 133.05657958984375, 103.5118408203125, -9.80059814453125, 7.821723937988281, 21.730297088623047, -12.80038070678711, -19.30126953125, -29.626693725585938, -5.927675247192383, 144.28671264648438, 163.03631591796875, 86.50450134277344, 114.91705322265625, 22.2799072265625, 50.915618896484375, 56.15904235839844, 124.18161010742188, -64.5213623046875, -12.7099609375, 72.2335205078125, -6.372467041015625, 80.29962158203125, -21.095458984375, 78.45201110839844, 7.791477203369141, 41.95884704589844, 11.15191650390625, -91.24700927734375, 1.0081634521484375, -50.625732421875, -16.595001220703125, 6.469818115234375, 176.0508575439453, 89.50798034667969, 156.442138671875, 7.195747375488281, -24.243927001953125, 11.280197143554688, -34.613525390625, 37.76990509033203, 13.129844665527344, 14.79461669921875, -48.601959228515625, -25.621124267578125, 35.18571472167969, -54.20365905761719, 26.44472312927246, 14.709991455078125, 30.185195922851562, 30.881683349609375, -19.383056640625, 80.74726867675781, 85.24974060058594, 30.30987548828125, 279.511962890625, 41.816375732421875, 95.49359130859375, 129.2302703857422, 7.640018463134766, -15.19586181640625, 39.671630859375, 12.7645263671875, 98.87785339355469, 19.169845581054688, 99.13848876953125, -47.7425537109375, 0.0, 29.4185791015625, 36.0789794921875, 182.04122924804688, 186.80282592773438, 100.46426391601562, 24.4849853515625, -27.185165405273438, 77.6754150390625, 41.55670166015625, 97.3402099609375, 15.194549560546875, -10.025352478027344, -4.02191162109375, 64.43365478515625, 33.56475830078125, 3.8455963134765625, -48.221710205078125, 41.73356628417969, -31.664024353027344, 85.51077270507812, 54.115478515625, 37.0284423828125, 28.86993408203125, 109.096435546875, 138.927490234375, 86.33682250976562, 69.74221801757812, 65.85055541992188, -38.508392333984375, -13.320953369140625, -7.721508026123047, 250.99969482421875, 3.4870567321777344, -35.22804260253906, 96.88311767578125, 3.3885650634765625, 22.322479248046875, 6.423957824707031, 351.27349853515625, -71.12567138671875, -109.569580078125, 17.118263244628906, -76.07379150390625, 76.96641540527344, 45.62591552734375, 33.76416015625, -7.559356689453125, 39.929351806640625, 30.164093017578125, 65.0093994140625, 51.932220458984375, 85.21728515625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000155.npy"}
{"epoch": 0.32460732984293195, "step": 156, "batch_size": 128, "mean": 52.62311553955078, "std": 83.71963500976562, "min": -174.5746307373047, "p10": -31.11628723144531, "median": 42.0711669921875, "p90": 158.17886962890626, "max": 288.66033935546875, "pos_frac": 0.765625, "sample": [128.41921997070312, 125.2623291015625, 12.03411865234375, 24.92926025390625, 99.4908447265625, 10.866546630859375, 7.761804580688477, -1.56689453125, -113.56202697753906, 52.406585693359375, 75.33483123779297, 45.37413024902344, 85.94548034667969, 71.2142333984375, 10.090435028076172, 106.72648620605469, 7.850788116455078, 125.350341796875, 129.2528076171875, 110.34774780273438, -172.02169799804688, 9.088150024414062, 77.7750244140625, 20.16736602783203, 158.319091796875, 152.6546630859375, 165.8890380859375, -17.001983642578125, 158.1187744140625, 93.08209228515625, -151.0582275390625, -30.348388671875, 6.858489990234375, -6.3952178955078125, 100.37643432617188, 59.22059631347656, -20.66845703125, -42.75193786621094, 91.50140380859375, 4.5718841552734375, -10.730499267578125, 75.38055419921875, -8.3580322265625, 19.198631286621094, 43.12103271484375, 224.63473510742188, 184.77835083007812, -10.89324951171875, 128.76559448242188, 37.802947998046875, 16.14105224609375, 37.4510498046875, 100.42852783203125, 23.96734619140625, -22.88153076171875, -16.3228759765625, -4.145172119140625, 105.95519256591797, 7.66331672668457, 110.22750854492188, 3.976837158203125, 112.0118408203125, 99.260986328125, 177.11614990234375, 108.37762451171875, 53.633995056152344, 98.3650894165039, 40.56524658203125, 17.41059112548828, 87.53396606445312, 68.85629272460938, 26.029052734375, 1.090087890625, 2.144195556640625, -3.9486083984375, 36.135719299316406, -64.14633178710938, 217.30615234375, -52.944061279296875, 20.305580139160156, -19.50037384033203, 84.15850830078125, 98.1761474609375, 58.89801025390625, -156.35153198242188, 195.28009033203125, 11.719926834106445, 187.9107666015625, 50.79899597167969, 44.93717956542969, 41.02130126953125, 138.09060668945312, 21.237140655517578, 108.24342346191406, 126.3593521118164, 145.608154296875, 72.27032470703125, 69.95468139648438, 30.94654083251953, -174.5746307373047, -66.31878662109375, 85.49267578125, 109.4931411743164, -32.908050537109375, -24.703750610351562, 141.6439208984375, 145.5972900390625, 2.40087890625, -55.647216796875, 232.31927490234375, 26.15399169921875, 111.72171020507812, 228.744140625, 24.766021728515625, 1.1741199493408203, -11.427148818969727, -10.766220092773438, -86.21688842773438, 56.90191650390625, 77.11203002929688, 152.22433471679688, 197.8563232421875, 181.9805908203125, 32.4979248046875, -25.719985961914062, 288.66033935546875, -62.14167022705078, 17.512481689453125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000156.npy"}
{"epoch": 0.3267015706806283, "step": 157, "batch_size": 128, "mean": 40.14427185058594, "std": 72.23072814941406, "min": -166.73138427734375, "p10": -45.610568237304676, "median": 31.843124389648438, "p90": 133.11300811767578, "max": 234.319091796875, "pos_frac": 0.7109375, "sample": [117.1322021484375, 13.217414855957031, 148.76101684570312, 135.5196990966797, 15.0677490234375, -17.5875244140625, 52.01588439941406, 224.7388916015625, 1.7712020874023438, -36.248382568359375, 12.570775985717773, -103.28973388671875, -20.945892333984375, 0.0, 35.07749938964844, 68.16290283203125, 48.76410675048828, -13.57241439819336, 132.85223388671875, -6.540069580078125, 77.21280670166016, 83.81210327148438, 172.12750244140625, 74.69253540039062, 52.5882568359375, 87.4189453125, -8.062995910644531, -43.20054626464844, 18.135818481445312, 119.78840637207031, 116.19776916503906, 60.19122314453125, 70.45437622070312, -31.12804412841797, 26.85064697265625, 111.30349731445312, 107.1436767578125, 18.20819091796875, 15.131759643554688, -103.27923583984375, -28.343368530273438, 157.84423828125, 38.98554992675781, -56.03466796875, 11.80609130859375, -50.775054931640625, 152.5941162109375, 23.99920654296875, -90.52203369140625, 69.17141723632812, 32.08612060546875, 34.978485107421875, 61.097015380859375, -98.76631164550781, 96.26083374023438, 22.313207626342773, 15.351089477539062, 112.81532287597656, -6.479766845703125, 87.1629638671875, 13.298980712890625, 13.090152740478516, -166.73138427734375, 131.46612548828125, 97.7066650390625, 60.150177001953125, -10.875030517578125, -28.296096801757812, 234.319091796875, 38.3941650390625, 134.02261352539062, -8.73187255859375, 150.49534606933594, -23.476255416870117, -56.5562744140625, 31.600128173828125, 93.39503479003906, -7.841033935546875, 11.410247802734375, -10.445915222167969, -43.397216796875, -135.57974243164062, -14.597442626953125, 9.352783203125, 27.666290283203125, 25.183547973632812, 93.52924346923828, 64.35140991210938, 101.76382446289062, 0.0, -54.09320068359375, 197.58233642578125, -71.6746826171875, 24.810165405273438, 4.04864501953125, 83.53646850585938, -72.2039794921875, 6.7092742919921875, 13.6209716796875, 38.74542236328125, 154.60031127929688, 21.0, 50.029205322265625, 26.90375518798828, 36.200225830078125, 121.35174560546875, -4.036834716796875, -53.7822265625, 86.94671630859375, 63.37640380859375, 133.7214813232422, 42.9918212890625, -23.2052001953125, 32.7528076171875, 83.78651428222656, 108.83615112304688, 107.17010498046875, -14.3350830078125, -25.145584106445312, 69.25259399414062, 124.78958129882812, 93.95933532714844, 41.88179016113281, 22.5966796875, 24.203338623046875, 121.37147521972656, 184.03094482421875, 92.87092590332031], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000157.npy"}
{"epoch": 0.3287958115183246, "step": 158, "batch_size": 128, "mean": 46.491737365722656, "std": 73.80120086669922, "min": -250.9534912109375, "p10": -30.89037322998047, "median": 41.22206115722656, "p90": 138.78583679199218, "max": 258.71832275390625, "pos_frac": 0.75, "sample": [23.06791114807129, 68.11180877685547, 39.81719970703125, 88.1348876953125, 59.04669189453125, 102.40652465820312, 25.23907470703125, 109.26725006103516, 78.20205688476562, 80.1305923461914, 74.65208435058594, 135.144287109375, 24.913055419921875, 9.36676025390625, 68.71273803710938, -5.559030532836914, 130.392578125, 15.836700439453125, -14.518318176269531, 0.0, 126.23733520507812, 12.370674133300781, 9.525146484375, 14.145462036132812, 210.61001586914062, 4.342437744140625, -89.07928466796875, 110.2618408203125, 96.07135772705078, 34.15052795410156, 72.47726440429688, 59.51524353027344, -28.9066162109375, 36.48065185546875, 0.9297027587890625, -31.6290283203125, 1.085479736328125, -61.801597595214844, 59.71875762939453, -75.73190307617188, 26.119483947753906, 94.21249389648438, 149.552001953125, 3.1354293823242188, -11.551445007324219, 11.392425537109375, 16.236244201660156, 164.58523559570312, 37.84082794189453, 42.626922607421875, 69.67062377929688, 5.7752685546875, 152.8546905517578, 79.68829345703125, 60.930755615234375, 128.7230224609375, 1.8417510986328125, 258.71832275390625, 116.16799926757812, 2.6314315795898438, 115.9683837890625, -0.7612457275390625, -7.5847015380859375, -46.364013671875, -107.58847045898438, 93.64044189453125, -5.370513916015625, 24.22479248046875, 85.43289184570312, 38.915771484375, 53.640384674072266, -10.701549530029297, 48.403709411621094, 165.71292114257812, 115.38900756835938, -250.9534912109375, -62.919334411621094, -22.9510498046875, 138.22625732421875, 48.5091552734375, -20.972076416015625, 53.0986328125, 112.11376953125, 95.13946533203125, 134.51405334472656, 74.4398193359375, 54.060272216796875, 117.92796325683594, 85.28439331054688, 201.40966796875, 75.92196655273438, 140.09152221679688, 100.66119384765625, 143.63446044921875, -41.290283203125, 56.80352783203125, -57.4434814453125, -97.57247924804688, 172.32611083984375, 31.82635498046875, -19.80303955078125, -9.686660766601562, -36.581932067871094, -10.702789306640625, 79.87783813476562, 39.04478454589844, -28.532073974609375, 108.45704650878906, 190.5355224609375, 54.64323425292969, -6.919189453125, 59.416778564453125, -30.573806762695312, 34.545860290527344, -95.26837158203125, -21.44839859008789, 94.52606201171875, -27.461135864257812, 37.1806640625, 18.844818115234375, 70.5028076171875, 28.076507568359375, 196.52325439453125, 62.923583984375, 143.64279174804688, 35.3497314453125, 45.4277229309082, 9.266372680664062], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000158.npy"}
{"epoch": 0.3308900523560209, "step": 159, "batch_size": 128, "mean": 28.29042625427246, "std": 79.55248260498047, "min": -176.59210205078125, "p10": -65.19134368896485, "median": 23.683441162109375, "p90": 116.19770431518552, "max": 385.6446533203125, "pos_frac": 0.6796875, "sample": [-11.684722900390625, 30.734100341796875, 58.7823486328125, 19.5609130859375, 66.0692138671875, -6.3910980224609375, 40.992034912109375, -73.72381591796875, 32.27275848388672, 21.795974731445312, -47.9591064453125, 113.3450698852539, -108.52047729492188, -34.043853759765625, 23.09027099609375, -62.950042724609375, 89.59228515625, 90.13812255859375, 30.5679931640625, 28.64630126953125, 48.598876953125, 86.6170654296875, -13.40310287475586, 11.239105224609375, 98.37533569335938, -72.50271606445312, 87.02880859375, 145.01412963867188, 122.85385131835938, -60.1072998046875, 154.4725341796875, -51.2232666015625, 5.906059265136719, -70.64608764648438, -62.062530517578125, 111.51629638671875, 71.12286376953125, -20.245849609375, -5.433910369873047, -61.7813720703125, 34.27281188964844, 14.492324829101562, -171.49285888671875, -65.49723052978516, 78.51205444335938, -5.170501708984375, 77.85491943359375, 91.14794921875, 42.41168212890625, 26.970321655273438, 10.40594482421875, -0.56707763671875, 47.61638641357422, -91.82106018066406, -176.59210205078125, 57.6351318359375, 165.3472442626953, 14.325580596923828, -17.935409545898438, -40.79481506347656, 73.620361328125, 106.76089477539062, 111.67269897460938, 145.891845703125, 33.5162353515625, 97.88824462890625, -81.04060363769531, 26.19812774658203, 37.96018981933594, 6.59649658203125, 44.90000915527344, 87.89152526855469, 46.39410400390625, 143.68606567382812, -1.1689071655273438, 79.80892944335938, 3.763214111328125, 47.53193664550781, 1.497894287109375, 4.547882080078125, -38.02609634399414, 137.83987426757812, 24.276611328125, -85.83987426757812, 235.9940185546875, 204.38934326171875, 83.04824829101562, -65.06024932861328, 57.75294494628906, 158.70254516601562, 71.22396087646484, 47.33819580078125, 0.851318359375, 93.56681060791016, -35.905853271484375, 98.95071411132812, -162.4027099609375, -51.66106414794922, 16.52508544921875, 385.6446533203125, -9.7681884765625, 58.00048828125, -6.7849884033203125, -8.454313278198242, 38.09075927734375, 20.736465454101562, 2.6219844818115234, 21.463409423828125, 150.22764587402344, 72.15473937988281, 17.606826782226562, 21.96002197265625, -62.57411193847656, 19.217041015625, -72.23320007324219, 18.82720947265625, 141.5419921875, 17.763999938964844, -112.17047119140625, -47.612548828125, -57.500732421875, 67.80795288085938, 52.70320129394531, 34.197662353515625, 62.821815490722656, 5.70977783203125, -1.90771484375, 64.82787322998047], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000159.npy"}
{"epoch": 0.33298429319371725, "step": 160, "batch_size": 128, "mean": 50.908164978027344, "std": 61.36040496826172, "min": -72.69879150390625, "p10": -17.75203285217285, "median": 53.5888671875, "p90": 120.25596923828124, "max": 240.29071044921875, "pos_frac": 0.765625, "sample": [27.588539123535156, 70.64717102050781, 0.0, 126.66592407226562, -50.5799560546875, 212.884033203125, 64.96853637695312, 3.3705101013183594, 163.30184936523438, 49.40534973144531, 41.5997314453125, 20.36370849609375, -56.635101318359375, 70.64739990234375, 58.77008056640625, 50.37762451171875, 3.05828857421875, 106.55538940429688, 87.20831298828125, 128.0988311767578, 3.7043304443359375, -5.32086181640625, 86.19329833984375, 36.274261474609375, -9.612060546875, -27.671417236328125, 104.17886352539062, -3.3301239013671875, -14.31927490234375, 90.13223266601562, 24.218597412109375, 71.77674102783203, 14.178619384765625, 90.25967407226562, 59.142120361328125, 71.8470458984375, 106.30379486083984, -17.03097152709961, 107.94575500488281, -4.41607666015625, 58.044525146484375, 57.72564697265625, 92.53062438964844, 95.00627136230469, 237.89227294921875, 105.46197509765625, -16.00140380859375, 27.203948974609375, 16.860769271850586, -48.2984619140625, 92.06771850585938, 1.2988510131835938, 22.23760986328125, 61.29264831542969, -20.55976104736328, 32.012603759765625, 0.734283447265625, 114.62847900390625, 22.803489685058594, 33.55902099609375, 240.29071044921875, 10.389923095703125, 75.29855346679688, 150.63319396972656, 119.26361083984375, 55.059326171875, 81.64108276367188, 110.39724731445312, 102.82508087158203, 81.29611206054688, 29.57470703125, 0.0, 70.86155700683594, -12.089309692382812, -7.4115447998046875, 95.49223327636719, 66.27066040039062, -26.071533203125, 8.513969421386719, 32.13884735107422, 10.160179138183594, 66.18115234375, 100.44386291503906, 64.67308044433594, 31.37603759765625, 2.5392837524414062, 16.21355438232422, -7.755645751953125, 87.09982299804688, 52.76605224609375, 16.142593383789062, 67.08973693847656, 77.4505615234375, 37.89227294921875, -72.69879150390625, 124.31524658203125, 44.73414611816406, 78.4156494140625, -7.788421630859375, -0.344390869140625, 54.41168212890625, -7.359161376953125, 85.09634399414062, -19.9066162109375, -13.406045913696289, -67.01153564453125, 94.65200805664062, 197.06927490234375, 111.294677734375, 115.5181655883789, 43.498016357421875, -54.88653564453125, 71.095703125, 78.2151107788086, 138.88827514648438, 66.33761596679688, 51.502960205078125, 122.57147216796875, 195.20980834960938, -19.43450927734375, 125.59523010253906, -48.537750244140625, 80.6485595703125, -60.78851318359375, 77.80119323730469, 86.94903564453125, -16.743423461914062, 5.4319000244140625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000160.npy"}
{"epoch": 0.33507853403141363, "step": 161, "batch_size": 128, "mean": 44.899330139160156, "std": 79.99961853027344, "min": -144.98097229003906, "p10": -41.220341491699216, "median": 37.642738342285156, "p90": 127.74954986572266, "max": 429.2919921875, "pos_frac": 0.734375, "sample": [-4.7895660400390625, 31.766822814941406, -19.61785888671875, -103.54769897460938, -5.313690185546875, -102.42073059082031, 28.658283233642578, 106.16970825195312, 137.54202270507812, -12.2073974609375, 81.66710662841797, -8.65753173828125, 0.0, 118.47544860839844, -8.121368408203125, 96.78961181640625, -17.837738037109375, 80.8800048828125, 53.96051025390625, 157.99728393554688, 46.15093994140625, 32.36872100830078, 59.04278564453125, -11.136383056640625, -95.15328216552734, 2.38531494140625, 429.2919921875, 142.68569946289062, -18.946693420410156, 38.29296875, 71.13949584960938, -124.4344482421875, 16.7830753326416, 18.100799560546875, 8.362136840820312, 15.347503662109375, 43.95991516113281, 62.951934814453125, 7.621227264404297, 53.12076187133789, 35.1143798828125, -4.14581298828125, 207.1474609375, 102.6202392578125, 27.15032958984375, 7.066423416137695, 22.61627197265625, 72.7789306640625, -119.95822143554688, 46.28228759765625, 223.85772705078125, 42.20713806152344, 23.513717651367188, 143.3797607421875, 53.63262939453125, -33.909393310546875, 67.05424499511719, 143.05105590820312, 66.85157775878906, 113.66525268554688, 124.73468017578125, 145.5045623779297, 36.99250793457031, 0.0, -57.7391357421875, 27.447296142578125, 7.237955093383789, 117.54911804199219, -16.80072021484375, -144.98097229003906, -32.50286865234375, -20.98828887939453, -54.01655578613281, 103.65814208984375, 28.722366333007812, 149.64825439453125, 127.17790222167969, 113.13239288330078, 48.36248779296875, 62.72596740722656, 81.91033935546875, 271.9092102050781, -44.533111572265625, 118.68649291992188, 103.44892120361328, 97.17581176757812, -39.80058288574219, 24.005401611328125, 94.14059448242188, -9.049797058105469, 113.0672607421875, 109.46540832519531, 29.695526123046875, 111.85940551757812, 127.46452331542969, -91.5757827758789, 113.80416870117188, -103.76165771484375, 4.862152099609375, 2.5366897583007812, 7.690093994140625, 53.822418212890625, -60.96087646484375, 11.266357421875, 91.19134521484375, 71.71355438232422, 38.42645263671875, 61.07733917236328, 26.83648681640625, 128.41461181640625, -37.4625244140625, 28.293670654296875, 108.0552978515625, -87.36503601074219, 12.527587890625, 109.69902801513672, 55.38780975341797, 106.35565185546875, 58.660858154296875, 130.90924072265625, -30.635406494140625, 125.372314453125, 19.879119873046875, 23.5064697265625, 32.457611083984375, 83.96896362304688, -26.45123291015625, 73.99761962890625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000161.npy"}
{"epoch": 0.33717277486910996, "step": 162, "batch_size": 128, "mean": 41.499610900878906, "std": 71.57195281982422, "min": -157.230712890625, "p10": -35.85155715942383, "median": 33.937835693359375, "p90": 129.99322814941405, "max": 306.3282775878906, "pos_frac": 0.71875, "sample": [-23.388839721679688, 12.685688018798828, 162.29660034179688, 74.75435638427734, 101.4080810546875, -41.105567932128906, 179.58685302734375, 45.25531005859375, 11.77020263671875, 33.8953857421875, -11.46160888671875, -4.768749237060547, 106.47659301757812, 139.13052368164062, 83.4343032836914, 57.0118408203125, 118.55032348632812, 180.97840881347656, 155.62440490722656, 105.14478302001953, 77.92890930175781, 56.830413818359375, 32.855712890625, 88.63575744628906, -14.371095657348633, 92.529541015625, 99.94113159179688, 67.58793640136719, -13.355209350585938, 64.94500732421875, 29.093826293945312, 134.31069946289062, -15.702407836914062, 33.98028564453125, -67.62120056152344, 30.29974365234375, 45.5252685546875, -144.97320556640625, -1.1914100646972656, 158.8231201171875, -81.09823608398438, 96.181884765625, 21.28295135498047, 96.20457458496094, 53.90509033203125, 18.39544677734375, 108.1195068359375, 1.3979911804199219, -26.90545654296875, 82.882080078125, 3.8426246643066406, 29.897727966308594, 52.575721740722656, -39.9678955078125, 98.83404541015625, -2.8623504638671875, 99.99771118164062, 112.40036010742188, 62.75581741333008, 10.21405029296875, 15.467098236083984, 8.44027328491211, 48.9801025390625, 33.340087890625, 10.893043518066406, 44.63819122314453, 67.97047424316406, 136.22027587890625, 7.7226104736328125, 128.14288330078125, -11.102252960205078, 41.71697998046875, 65.8826904296875, -37.52983856201172, 113.6319580078125, 21.17083740234375, 10.372344970703125, 69.68118286132812, 82.84783935546875, 50.629852294921875, -157.230712890625, 38.767608642578125, 118.19326782226562, -26.330875396728516, 13.74127197265625, 144.636962890625, -10.778289794921875, -45.33943176269531, 59.38031005859375, 89.6104736328125, -18.481887817382812, 87.97601318359375, 74.91705322265625, -37.6456298828125, 32.55384063720703, 12.2857666015625, -0.5054359436035156, 61.19990539550781, -13.8270263671875, -34.74372863769531, 184.8616943359375, -155.03408813476562, 191.93194580078125, 40.779754638671875, -58.030670166015625, 35.540802001953125, -14.063468933105469, 90.60174560546875, 306.3282775878906, 147.32388305664062, -17.358642578125, -24.167434692382812, -35.132293701171875, 10.599029541015625, 14.59564208984375, 111.13487243652344, -12.671218872070312, 92.81517028808594, -51.64434814453125, -6.147064208984375, 54.01620864868164, -114.08758544921875, 21.72479248046875, 14.082656860351562, -31.68108367919922, 117.75631713867188, 25.58648681640625, 33.38755798339844], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000162.npy"}
{"epoch": 0.3392670157068063, "step": 163, "batch_size": 128, "mean": 47.45769119262695, "std": 67.6092758178711, "min": -135.240234375, "p10": -32.385169982910156, "median": 44.139060974121094, "p90": 128.1037933349609, "max": 241.20318603515625, "pos_frac": 0.765625, "sample": [38.7664794921875, 5.082073211669922, 95.4523696899414, -25.266448974609375, -34.75634765625, 73.21023559570312, 81.4827880859375, 19.31610107421875, 87.77935791015625, 35.596710205078125, -6.077056884765625, 39.802146911621094, 91.46094512939453, 40.945159912109375, 94.56929016113281, 2.0035400390625, -15.226028442382812, -42.5037841796875, 65.78116607666016, 100.00654602050781, -97.46796417236328, 62.17240524291992, 144.745361328125, 19.7073974609375, -0.533599853515625, 135.05630493164062, 42.1695556640625, 44.983158111572266, 93.68557739257812, 61.89726257324219, 88.17848205566406, 10.85772705078125, 99.65933227539062, -15.428047180175781, 43.123321533203125, 37.16973876953125, 42.019439697265625, 63.6292724609375, -90.10687255859375, -32.70758056640625, -30.6943359375, 106.8409423828125, 69.13824462890625, -16.775672912597656, 44.176300048828125, 165.85484313964844, 53.20948791503906, 206.5223846435547, 53.0772705078125, 104.67704772949219, -95.4530029296875, 117.45614624023438, 93.25247192382812, 125.1241455078125, -56.84721374511719, 81.187255859375, 60.99024963378906, 106.07585144042969, 92.87648010253906, 111.22138977050781, 60.292266845703125, 136.59515380859375, 124.46560668945312, 44.10182189941406, 54.630828857421875, -5.4462890625, 6.985992431640625, 117.33685302734375, -3.288360595703125, 52.17512512207031, 9.804244995117188, 69.1397705078125, 8.22357177734375, 65.42379760742188, -10.534721374511719, -12.696945190429688, 5.173004150390625, 52.54425048828125, 121.77931213378906, 111.52963256835938, 83.75511169433594, 138.93191528320312, 1.1564483642578125, -60.0233154296875, -42.51545715332031, 31.433929443359375, 92.91635131835938, 2.89910888671875, 241.20318603515625, 123.34063720703125, 7.200403213500977, 21.629241943359375, -7.421062469482422, -32.24699401855469, -135.240234375, 71.64125061035156, 181.98316955566406, 22.283721923828125, 145.41397094726562, -45.42192077636719, -12.7620849609375, 163.7269744873047, -122.13494873046875, 113.86529541015625, 112.5750732421875, 2.406158447265625, -10.982223510742188, 178.29959106445312, 20.0491943359375, 8.781814575195312, 38.78179931640625, 1.4160079956054688, 103.7421875, -29.158248901367188, 140.435302734375, 120.28485107421875, 2.5166473388671875, -37.90721130371094, 45.7598876953125, 76.67254638671875, 92.21717834472656, 111.05398559570312, 35.88226318359375, 1.60699462890625, 145.13787841796875, 24.625198364257812, 32.25209045410156, -27.859657287597656], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000163.npy"}
{"epoch": 0.3413612565445026, "step": 164, "batch_size": 128, "mean": 43.03172302246094, "std": 76.10730743408203, "min": -159.06103515625, "p10": -49.90221252441406, "median": 38.98447608947754, "p90": 135.40966491699217, "max": 300.484619140625, "pos_frac": 0.6953125, "sample": [10.643341064453125, 49.085960388183594, 12.726238250732422, 163.26837158203125, 56.019134521484375, 155.58506774902344, -64.92427062988281, 131.54278564453125, 10.364913940429688, 180.36386108398438, 8.177602767944336, 121.0977783203125, 33.353851318359375, 16.08648681640625, -27.03301239013672, -1.8865280151367188, -49.619964599609375, 95.17716979980469, 2.92279052734375, -57.83943176269531, 151.82415771484375, 75.9874267578125, -33.03627014160156, 12.764488220214844, 43.32354736328125, 93.41796875, -63.800933837890625, 57.22126007080078, -11.067707061767578, 55.61981201171875, -33.57817077636719, -72.29412841796875, 5.361785888671875, -2.8330841064453125, 36.59273147583008, -50.560791015625, -32.25489807128906, 17.687973022460938, 1.2176742553710938, -105.48733520507812, 44.15972900390625, -72.35595703125, 126.99870300292969, -12.889892578125, -24.336212158203125, -4.371494293212891, -121.56854248046875, 170.50997924804688, 74.71710205078125, 18.66424560546875, 140.6644287109375, 133.05706787109375, -9.872528076171875, 60.001434326171875, -15.105133056640625, 2.6451568603515625, 22.57366943359375, 54.89115905761719, 191.19873046875, 101.54153442382812, 23.04058074951172, -6.65264892578125, 123.79107666015625, 13.967437744140625, 88.18423461914062, 98.21124267578125, 124.09848022460938, -8.510330200195312, -56.8992919921875, 104.00559997558594, 101.0372314453125, 41.376220703125, 10.694717407226562, -12.724838256835938, 118.661865234375, -20.431243896484375, 58.73283386230469, 133.15762329101562, 60.00041198730469, 46.94482421875, 131.49105834960938, -12.50994873046875, -79.17569732666016, 15.23779296875, 0.40921783447265625, -87.46755981445312, 148.45962524414062, -45.476104736328125, 94.50503540039062, 89.20526123046875, 1.0851325988769531, 129.76083374023438, 101.84829711914062, 67.46612548828125, 142.75848388671875, 126.36459350585938, -14.21551513671875, 66.1951904296875, 86.34909057617188, -159.06103515625, 300.484619140625, 195.8133087158203, 62.551055908203125, 43.085540771484375, -6.411163330078125, 100.89732360839844, 42.58673095703125, 103.88674926757812, -68.56741333007812, 90.25553894042969, 119.42898559570312, 32.60919189453125, 42.8055419921875, 2.333608627319336, -31.775970458984375, -22.85393524169922, 107.5738525390625, 68.90289306640625, -14.255767822265625, 58.344940185546875, -6.858482360839844, 116.99459838867188, -15.5343017578125, 24.229095458984375, 50.71954345703125, 200.66819763183594, 187.8026123046875, 6.087139129638672], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000164.npy"}
{"epoch": 0.34345549738219894, "step": 165, "batch_size": 128, "mean": 30.293659210205078, "std": 62.89502716064453, "min": -95.41741943359375, "p10": -47.496664428710936, "median": 20.99921417236328, "p90": 119.63425140380859, "max": 191.34779357910156, "pos_frac": 0.6640625, "sample": [22.235015869140625, 96.73638916015625, 5.004833221435547, 60.8135986328125, -95.41741943359375, 17.660385131835938, 6.204368591308594, 20.198890686035156, -95.012451171875, 72.89797973632812, -10.82733154296875, 174.9866943359375, 34.6244010925293, 29.43896484375, 135.75143432617188, 27.54302978515625, 30.3846435546875, -42.772552490234375, 54.669158935546875, 20.790069580078125, 14.244903564453125, 1.530303955078125, 136.5557861328125, 23.37883758544922, -14.761749267578125, 88.81037902832031, 105.25875854492188, -17.9080810546875, 163.31101989746094, 40.17462158203125, 125.95455932617188, 108.23013305664062, -47.04083251953125, -25.737518310546875, 18.39581298828125, 47.200347900390625, 28.17334747314453, -6.6614990234375, -12.586305618286133, -73.90306091308594, 41.56964111328125, -41.473175048828125, 191.34779357910156, 100.9347915649414, 18.59809112548828, -59.437255859375, 66.197998046875, -4.39874267578125, -10.385772705078125, -60.7188720703125, 21.03466796875, -52.06587219238281, 111.52691650390625, -26.470848083496094, 116.18224334716797, 112.34456634521484, 12.77286148071289, 52.49591064453125, 6.033073425292969, 96.5880126953125, 37.799346923828125, -20.8897705078125, 22.368072509765625, 90.8167495727539, 151.31182861328125, 72.93197631835938, 76.148193359375, 14.356681823730469, 35.53082275390625, 38.37825012207031, 167.94073486328125, 8.759521484375, 49.60675048828125, -69.33160400390625, -17.5123291015625, -27.32440185546875, -72.55123901367188, 16.04554557800293, -15.331047058105469, 7.11041259765625, 10.775390625, 48.747257232666016, 104.49456787109375, -3.05029296875, 156.92242431640625, -35.150604248046875, 23.098915100097656, 72.45330810546875, 49.84661865234375, 119.1251220703125, 11.606414794921875, 3.2682952880859375, -0.4359588623046875, 110.06610107421875, -89.99221801757812, -69.32711791992188, -75.44305419921875, 128.07598876953125, 85.30010986328125, -12.889190673828125, 58.434661865234375, 133.64599609375, -1.1191673278808594, -15.766876220703125, 38.404449462890625, 69.14089965820312, -15.3038330078125, -29.68328857421875, -5.091339111328125, 52.82415771484375, 44.604652404785156, 82.62417602539062, 120.82221984863281, -7.802490234375, 49.392425537109375, 36.02227783203125, 74.87014770507812, -57.49102783203125, 2.873577117919922, -4.185489654541016, -28.101715087890625, 44.06682205200195, 149.41397094726562, 20.963760375976562, -30.0391845703125, -32.23188781738281, -48.560272216796875, 9.997970581054688], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000165.npy"}
{"epoch": 0.34554973821989526, "step": 166, "batch_size": 128, "mean": 37.84318542480469, "std": 72.3558349609375, "min": -107.37374877929688, "p10": -50.68227844238281, "median": 22.06146240234375, "p90": 132.6973663330078, "max": 266.1121826171875, "pos_frac": 0.671875, "sample": [42.704345703125, 5.7494659423828125, 3.3343963623046875, 55.579193115234375, 146.54933166503906, 180.017822265625, 141.03347778320312, 115.98681640625, 55.43791198730469, 131.88003540039062, 43.35151672363281, -15.620079040527344, -48.86151123046875, 61.61383056640625, 42.10974884033203, 169.8458251953125, 99.78633117675781, -80.20306396484375, 106.49188232421875, 40.4998779296875, 30.642913818359375, 128.8422088623047, 89.30618286132812, 42.413665771484375, -73.727783203125, 58.449066162109375, 20.224105834960938, -13.67547607421875, 23.2335205078125, 71.10101318359375, 137.66387939453125, -77.84584045410156, -2.8616943359375, 66.87789154052734, 229.91574096679688, 100.7027587890625, 20.02039337158203, -19.545257568359375, -77.22217559814453, -87.37788391113281, -8.902687072753906, -74.89889526367188, 57.49591064453125, 78.3748779296875, -3.916168212890625, 2.441650390625, 188.86532592773438, -107.37374877929688, 56.35113525390625, -34.30381774902344, 59.92503356933594, 70.97039794921875, 118.83291625976562, -4.392429351806641, -54.45375061035156, 44.94764709472656, 12.156951904296875, 111.32125854492188, 51.10504150390625, 0.04801177978515625, -25.34271240234375, -10.8983154296875, 65.63333129882812, 53.495849609375, -4.3421478271484375, 15.87685775756836, 43.895790100097656, -2.1356964111328125, 8.307411193847656, -9.653549194335938, -4.5728607177734375, -5.105705261230469, -55.17498779296875, 31.4267578125, 3.9692134857177734, 66.7908935546875, -63.44769287109375, 118.50088500976562, 49.25177001953125, 98.28224182128906, 172.33151245117188, -12.888511657714844, 20.36371612548828, 52.64930725097656, -0.12602615356445312, 60.268402099609375, 78.46575927734375, 129.20652770996094, 0.12677001953125, -49.06593322753906, -17.9229736328125, 5.4375152587890625, 189.68850708007812, 7.356101989746094, 266.1121826171875, 4.893306732177734, 150.85092163085938, -93.72113037109375, 7.8186492919921875, -21.732025146484375, -32.27626037597656, 133.42755126953125, 15.23590087890625, 69.75050354003906, -99.44660949707031, -45.700836181640625, -1.1939697265625, 85.57366943359375, -21.227218627929688, -27.165328979492188, 40.0067138671875, 20.889404296875, 103.38245391845703, 11.527029037475586, 91.314453125, 2.260101318359375, -5.151058197021484, 132.38442993164062, -9.2476806640625, -87.51800537109375, -0.6602287292480469, 139.01998901367188, 121.50831604003906, 95.51560974121094, 16.371551513671875, 122.02587890625, 118.70451354980469, 0.728057861328125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000166.npy"}
{"epoch": 0.34764397905759165, "step": 167, "batch_size": 128, "mean": 43.865577697753906, "std": 75.7043228149414, "min": -157.5503692626953, "p10": -44.36219024658202, "median": 38.048606872558594, "p90": 130.3259735107422, "max": 291.621826171875, "pos_frac": 0.7578125, "sample": [94.57971954345703, 6.611274719238281, -13.709869384765625, 62.56927490234375, 71.9095230102539, 130.23858642578125, 32.078125, 14.797439575195312, 21.501190185546875, 135.0118408203125, 33.7354736328125, 35.60337829589844, 87.61886596679688, 69.067138671875, -26.957382202148438, 93.32177734375, 99.88916015625, 27.95654296875, 130.52987670898438, -35.290771484375, -29.2862548828125, -59.68714904785156, -3.9494705200195312, 0.374359130859375, 13.706382751464844, 123.57388305664062, 85.47958374023438, -40.8818359375, -70.42634582519531, 10.325546264648438, 30.552505493164062, 29.418792724609375, 95.54297637939453, 103.4940185546875, 87.30072021484375, 261.7576904296875, 65.83570861816406, 22.827316284179688, 60.907318115234375, -11.4771728515625, 88.49334716796875, 77.67184448242188, -124.68318939208984, -123.11177062988281, -105.26460266113281, 129.95077514648438, 117.30245971679688, 3.035919189453125, 115.94662475585938, 11.268009185791016, 3.227783203125, -0.30013275146484375, 19.442047119140625, 9.354095458984375, -117.1982421875, 149.91546630859375, 120.11492919921875, -38.74916076660156, 45.803924560546875, 3.4150352478027344, 21.8563232421875, 30.96636962890625, 58.2607421875, 131.80282592773438, 101.18765258789062, 150.74525451660156, 100.77749633789062, -140.08352661132812, 156.7659912109375, 10.715850830078125, -14.1441650390625, 35.702484130859375, -0.13271331787109375, -10.923240661621094, -52.48301696777344, 62.87713623046875, 122.14830780029297, -10.822479248046875, 64.1737060546875, -10.223312377929688, 67.1550521850586, -31.278411865234375, 104.44023895263672, 39.75489044189453, -61.12416076660156, -18.124679565429688, 166.48687744140625, 25.309112548828125, 67.2996826171875, 99.74372863769531, 68.7216796875, 36.342323303222656, 104.64453125, 63.7384033203125, 8.541969299316406, 23.749717712402344, 119.06327819824219, 2.018949508666992, 83.48768615722656, 84.82534790039062, -7.9411773681640625, -157.5503692626953, 0.0, 87.9974365234375, 93.67794799804688, 120.09103393554688, 133.43026733398438, 145.3386993408203, -86.08578491210938, 3.11279296875, 123.50753784179688, 2.433055877685547, 52.179351806640625, 1.673980712890625, 163.93218994140625, 52.279144287109375, 84.22219848632812, -99.36650085449219, 30.6455078125, 118.563232421875, 78.14547729492188, 47.56085205078125, 68.17950439453125, 155.53314208984375, -97.91790771484375, 102.8604736328125, 10.621734619140625, 291.621826171875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000167.npy"}
{"epoch": 0.34973821989528797, "step": 168, "batch_size": 128, "mean": 48.48344802856445, "std": 74.30439758300781, "min": -211.626953125, "p10": -24.607238769531246, "median": 52.05440330505371, "p90": 145.14313049316405, "max": 216.77841186523438, "pos_frac": 0.7578125, "sample": [78.92340087890625, 23.2103271484375, 148.58648681640625, 142.56658935546875, 99.2904052734375, 22.59588623046875, 96.40956115722656, 81.52828979492188, 56.81158447265625, 17.7852783203125, -44.58270263671875, 110.4713134765625, 155.34039306640625, -73.02389526367188, 185.486572265625, -112.885009765625, 76.068115234375, 0.4969921112060547, 216.77841186523438, 55.81781005859375, -22.122634887695312, 47.611419677734375, 67.5047607421875, -123.60809326171875, -10.853302001953125, 35.08625793457031, 116.1046142578125, 51.64495849609375, 30.580810546875, -70.47607421875, 155.19114685058594, 78.71292114257812, 176.49053955078125, 66.57994842529297, -211.626953125, 46.480384826660156, 52.01776123046875, -27.30487060546875, 61.226318359375, 14.37811279296875, 58.69293975830078, 71.671875, 33.17449951171875, 57.38037109375, 4.943443298339844, 146.02685546875, -88.0316162109375, 78.83143615722656, 186.93072509765625, 80.09454345703125, 119.20819091796875, -8.562397003173828, 79.31517028808594, 29.721054077148438, 97.6051025390625, 145.13299560546875, 60.599945068359375, 193.734130859375, 91.95138549804688, 72.71249389648438, 32.634307861328125, 140.32037353515625, 87.7880859375, 115.39251708984375, 65.04464721679688, 31.182235717773438, 173.38421630859375, 2.54425048828125, 103.763427734375, 29.80120849609375, -0.952606201171875, 107.45611572265625, 135.46282958984375, -7.149639129638672, 91.60336303710938, 57.42424774169922, 75.96762084960938, 82.65408325195312, 50.75658416748047, 83.47439575195312, -62.36273193359375, 95.53990173339844, 128.56362915039062, -3.645101547241211, 114.63135528564453, 37.2999267578125, -2.6508636474609375, -13.136947631835938, 145.16677856445312, 158.0099334716797, -133.11932373046875, 0.47934722900390625, -11.15325927734375, 82.18353271484375, -3.24591064453125, -30.866943359375, 0.034149169921875, 52.09104537963867, 3.1169509887695312, 4.9119415283203125, -17.087722778320312, 197.84014892578125, 65.01092529296875, 99.42047119140625, -20.339630126953125, 14.280502319335938, 0.0, -2.8547897338867188, -129.29998779296875, -1.341400146484375, 6.633544921875, 98.55433654785156, 108.68365478515625, 85.78437805175781, 18.949485778808594, 8.86859130859375, 34.46846389770508, 2.439666748046875, -10.651458740234375, 34.979461669921875, 127.19302368164062, -6.085418701171875, 21.979690551757812, 71.997314453125, 12.967132568359375, -23.45111083984375, 108.15829467773438, -104.04318237304688], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000168.npy"}
{"epoch": 0.3518324607329843, "step": 169, "batch_size": 128, "mean": 56.334102630615234, "std": 75.15252685546875, "min": -143.4651641845703, "p10": -25.68096008300781, "median": 49.15863800048828, "p90": 156.69805297851562, "max": 271.3860168457031, "pos_frac": 0.7421875, "sample": [-27.433822631835938, 152.45281982421875, 144.5294189453125, 96.74029541015625, 133.27346801757812, -10.226287841796875, 0.8109130859375, 33.10188293457031, 84.76202392578125, -29.04803466796875, 5.48246955871582, 123.09359741210938, 67.80577087402344, 20.856414794921875, 28.92730712890625, 37.49560546875, 36.8724365234375, 188.847412109375, 111.61343383789062, 212.437744140625, 50.828948974609375, 62.644012451171875, 128.0189971923828, 19.319053649902344, 27.657196044921875, -17.513168334960938, 26.536998748779297, 119.12301635742188, 161.07977294921875, 72.42398071289062, 50.87664794921875, -0.014690399169921875, -10.454132080078125, 9.381362915039062, -11.353290557861328, 18.105880737304688, -23.92177391052246, -143.4651641845703, 59.47428894042969, -5.50128173828125, -24.929733276367188, -41.854248046875, 65.86835479736328, 156.98208618164062, 190.98565673828125, 106.52139282226562, 64.02003479003906, 68.60575866699219, 18.4334716796875, 100.70391845703125, -15.620750427246094, 156.57632446289062, 0.7398891448974609, 111.74556732177734, 271.3860168457031, 42.019073486328125, 2.5147476196289062, 61.41943359375, 118.32962036132812, -42.59197998046875, 25.0672607421875, 13.471702575683594, 213.55859375, 96.52552795410156, -13.826385498046875, -34.334999084472656, 60.01470947265625, 152.63589477539062, 22.5152587890625, 23.27197265625, 44.97417449951172, 14.802543640136719, 20.3936767578125, 144.44454956054688, 54.0399169921875, -52.40911865234375, -46.77009582519531, 62.69420623779297, -2.6439571380615234, -0.2527313232421875, 34.07305908203125, 50.705780029296875, 79.8216552734375, -12.1741943359375, -119.89828491210938, -72.2863540649414, 113.54727172851562, -5.3641204833984375, 85.9530029296875, 70.00616455078125, 2.9170989990234375, 192.6038818359375, 47.61149597167969, -0.015359878540039062, 94.916259765625, 102.38725280761719, 122.28646087646484, 134.03826904296875, -108.2940673828125, -0.2601776123046875, 0.0, -46.94158935546875, -19.728879928588867, 76.38616943359375, -9.291778564453125, 105.56719970703125, 132.4290771484375, 161.1014404296875, 117.8375244140625, 94.22306823730469, 19.8021240234375, 6.563591003417969, 28.26471710205078, 54.956207275390625, 71.22509765625, 40.1162109375, 217.87664794921875, 68.65971374511719, 122.34133911132812, -1.3774700164794922, 111.10466766357422, 144.07684326171875, 101.1324462890625, 37.08568572998047, 210.32528686523438, 208.56674194335938, 159.6141357421875, -29.39514923095703], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000169.npy"}
{"epoch": 0.3539267015706806, "step": 170, "batch_size": 128, "mean": 46.92905807495117, "std": 88.7153091430664, "min": -260.1707763671875, "p10": -55.5950927734375, "median": 41.46025085449219, "p90": 152.90999450683591, "max": 417.7479248046875, "pos_frac": 0.6953125, "sample": [-10.387298583984375, 79.90316772460938, 218.22093200683594, -69.23492431640625, 57.47383117675781, 4.7098846435546875, 7.4682464599609375, 13.868621826171875, -55.007415771484375, 56.816314697265625, 27.586814880371094, -1.9361610412597656, -34.2911376953125, 69.46589660644531, 200.4167938232422, 104.75682830810547, 108.6778564453125, 50.58734893798828, -7.063568115234375, 40.504638671875, 93.21115112304688, 35.749755859375, 37.19805908203125, 417.7479248046875, -72.400390625, 74.61114501953125, 85.584228515625, 104.61697387695312, 137.78302001953125, -56.966339111328125, -53.3201904296875, -17.523818969726562, 66.59416198730469, 59.37152099609375, -23.874160766601562, 195.486572265625, -10.442718505859375, -20.313461303710938, 78.20460510253906, 98.72808837890625, -21.099273681640625, 4.5631103515625, 42.46693420410156, 96.44223022460938, 192.29150390625, -113.6316909790039, 55.906646728515625, 33.41668701171875, 139.3056640625, -2.156951904296875, -61.818359375, 228.844482421875, -5.8711090087890625, -21.995529174804688, 79.07899475097656, 8.39923095703125, 123.5177001953125, 115.21533203125, -78.42697143554688, 48.172607421875, 32.4737548828125, 98.16021728515625, 8.690277099609375, -2.1345367431640625, -38.542701721191406, 41.44189453125, 94.1700439453125, 158.86627197265625, 8.7373046875, -35.447296142578125, -20.890838623046875, 58.359130859375, 13.070243835449219, 1.787942886352539, -87.34085845947266, -3.4324512481689453, 241.1776123046875, -12.687957763671875, 224.16836547851562, 101.60621643066406, 52.025428771972656, 41.478607177734375, 90.81620788574219, 70.48043823242188, -130.21954345703125, 20.535919189453125, 36.522796630859375, 21.85076141357422, 95.52386474609375, -97.91970825195312, 111.65194702148438, 45.522430419921875, -83.36139678955078, 47.400543212890625, 177.90121459960938, 155.8079833984375, 100.32606506347656, 62.99320983886719, -260.1707763671875, 244.68768310546875, 46.101287841796875, 132.20211791992188, 108.38198852539062, -47.828216552734375, 120.6790771484375, -14.225555419921875, 137.24713134765625, 35.77714538574219, 64.7491455078125, 151.66799926757812, 103.75968933105469, -73.96377563476562, 0.0, 146.17105102539062, 43.61619567871094, 105.90432739257812, 4.1117706298828125, 75.26278686523438, 27.945266723632812, -14.305755615234375, 41.299560546875, 71.98641967773438, -23.028579711914062, -92.23787689208984, 2.9288558959960938, 36.613548278808594, -0.86181640625, 175.67581176757812], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000170.npy"}
{"epoch": 0.35602094240837695, "step": 171, "batch_size": 128, "mean": 55.4050178527832, "std": 76.00269317626953, "min": -121.56112670898438, "p10": -42.63508911132812, "median": 52.071815490722656, "p90": 158.1197540283203, "max": 278.28826904296875, "pos_frac": 0.7578125, "sample": [169.487548828125, 53.67724609375, 24.2696533203125, 30.23193359375, -12.12890625, 80.7553939819336, 92.47125244140625, 96.30831909179688, -34.87449645996094, 91.21528625488281, 47.349212646484375, 246.6124267578125, -55.848907470703125, 145.55191040039062, 173.197021484375, 98.64385986328125, 56.43500900268555, 161.38040161132812, 104.15875244140625, 104.82305908203125, 85.41232299804688, -5.846843719482422, 217.97174072265625, -19.525970458984375, -1.1007423400878906, 3.2005844116210938, -63.806671142578125, 52.585662841796875, 118.20518493652344, 128.3754425048828, 35.99794006347656, 85.46713256835938, 181.3287353515625, 65.29280090332031, 163.50222778320312, -106.52578735351562, 46.47413635253906, 77.09820556640625, 22.282794952392578, -42.1273193359375, 112.70106506347656, -45.81748580932617, 37.81182861328125, 157.13504028320312, 37.058685302734375, 183.12503051757812, 68.33767700195312, 19.815139770507812, 160.41741943359375, 101.25468444824219, 23.6358642578125, 140.5833740234375, 9.021636962890625, -24.00823974609375, -11.79193115234375, 28.62353515625, 35.60797119140625, -121.56112670898438, 80.89434814453125, 65.0982666015625, 47.89776611328125, 52.22291564941406, 135.10459899902344, 19.799148559570312, -90.50796508789062, 22.569488525390625, -17.597061157226562, 14.895561218261719, 57.232574462890625, -72.21383666992188, -66.90481567382812, 18.2850341796875, 104.64404296875, 45.456268310546875, -36.055877685546875, 72.07695007324219, 77.649658203125, 156.36642456054688, 278.28826904296875, -77.4359130859375, 53.21630859375, 18.64495849609375, -17.403724670410156, 3.3660430908203125, 77.57209777832031, -32.63482666015625, 93.42040252685547, 125.6744384765625, -11.49810791015625, 134.07763671875, 83.41542053222656, 172.61123657226562, 119.67851257324219, 117.53841400146484, 1.5502777099609375, 57.60645294189453, 145.17721557617188, -34.81007385253906, 36.316497802734375, 57.45403289794922, 34.107330322265625, 93.4981689453125, 143.965087890625, -23.430850982666016, -43.81988525390625, 22.799560546875, -69.89838409423828, 122.384033203125, 116.39663696289062, 42.348236083984375, 64.43612670898438, 128.62078857421875, 147.78811645507812, 19.621917724609375, 27.52325439453125, 51.92071533203125, 47.982269287109375, -16.761585235595703, 48.59820556640625, 34.53582763671875, -83.42733764648438, -0.9308929443359375, 101.7686767578125, 176.4606475830078, -44.111244201660156, 162.9024658203125, 89.15980529785156, -21.261688232421875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000171.npy"}
{"epoch": 0.3581151832460733, "step": 172, "batch_size": 128, "mean": 32.18754959106445, "std": 73.4654312133789, "min": -157.8133544921875, "p10": -64.63937072753906, "median": 27.820755004882812, "p90": 123.92278442382812, "max": 199.40634155273438, "pos_frac": 0.6953125, "sample": [36.9788818359375, -66.24118041992188, -44.387542724609375, 8.047332763671875, -121.01434326171875, -48.658447265625, 71.70233154296875, 84.18966674804688, 51.977447509765625, 147.94696044921875, 158.58709716796875, 17.278610229492188, 105.22100830078125, 43.839752197265625, -2.615203857421875, -39.5313720703125, 199.40634155273438, 101.58905029296875, 26.6527099609375, 96.25848388671875, -21.870819091796875, 21.79217529296875, -75.57735443115234, 26.334136962890625, 38.070709228515625, 48.819175720214844, 15.792766571044922, -130.2734375, 87.74921417236328, 78.43801879882812, 91.143310546875, 116.13108825683594, 113.37916564941406, -30.658905029296875, -1.414764404296875, -34.94938659667969, 23.066650390625, 77.74383544921875, -76.90567779541016, -18.627471923828125, -3.91790771484375, -43.910186767578125, 57.68585205078125, 170.34442138671875, 15.908203125, -24.222305297851562, -34.310882568359375, 22.33013916015625, 87.277099609375, 76.01091003417969, 159.01611328125, -78.41842651367188, -16.0338134765625, 125.394775390625, 28.66448974609375, 18.231884002685547, -2.4538116455078125, 46.73150634765625, 11.408462524414062, 123.29193115234375, -17.737457275390625, 35.64324951171875, 75.97725677490234, 116.18728637695312, 47.96783447265625, 10.767578125, 0.9191131591796875, 29.09899139404297, 17.48725700378418, 83.68853759765625, -87.77984619140625, 1.3237800598144531, -9.560394287109375, 57.524627685546875, 82.02969360351562, 3.9447021484375, 114.89556884765625, 99.07896423339844, -140.63177490234375, 16.521888732910156, -44.59861755371094, 62.55401611328125, 151.40167236328125, 156.35629272460938, 126.62336730957031, 38.821807861328125, -3.6674041748046875, 97.47552490234375, 26.977020263671875, -58.855010986328125, 115.72836303710938, 5.179569244384766, -148.15872192382812, 90.07789611816406, 14.429931640625, 20.82440185546875, -109.93621826171875, 4.930173873901367, 196.70997619628906, 18.882484436035156, 119.4498062133789, -157.8133544921875, 28.692733764648438, 95.74229431152344, -29.450973510742188, 83.2691650390625, 18.03741455078125, 136.09613037109375, -11.536285400390625, 46.33447265625, 65.26602172851562, -2.8527679443359375, 69.15170288085938, -13.969589233398438, 61.7069091796875, -88.6380615234375, 84.1773681640625, 49.920166015625, 48.8321533203125, 68.07638549804688, 130.27801513671875, -63.952880859375, -117.86125183105469, 151.01185607910156, 0.0, 69.3360595703125, 55.681915283203125, 11.47918701171875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000172.npy"}
{"epoch": 0.36020942408376966, "step": 173, "batch_size": 128, "mean": 42.587059020996094, "std": 79.54762268066406, "min": -144.80148315429688, "p10": -52.11682281494141, "median": 30.896503448486328, "p90": 147.36446990966797, "max": 312.91656494140625, "pos_frac": 0.65625, "sample": [-59.663909912109375, -144.80148315429688, -110.93984985351562, 75.06674194335938, 5.9284515380859375, 4.8772430419921875, -3.96551513671875, 56.20735168457031, -3.40118408203125, 29.93054962158203, 121.70901489257812, 174.39138793945312, 24.097190856933594, 23.77288818359375, -32.33216857910156, -1.9831008911132812, 111.79388427734375, 136.02883911132812, -80.65306091308594, 26.5721435546875, 149.25436401367188, 45.1580810546875, 167.20697021484375, 108.69639587402344, 121.3084716796875, -25.780670166015625, 100.6239013671875, 23.0921630859375, -69.60992431640625, -35.545562744140625, -8.72372817993164, -19.048614501953125, 43.9637451171875, 7.1866607666015625, 72.1051025390625, -99.79293823242188, -56.024078369140625, 54.51702880859375, 45.62748718261719, 99.9898681640625, 92.09893798828125, -3.516815185546875, 14.578964233398438, 50.01995849609375, -91.8016357421875, 205.746826171875, 38.459259033203125, -54.994354248046875, 13.464126586914062, 136.01766967773438, 3.7645797729492188, -10.90673828125, 93.52256774902344, -51.170562744140625, -4.696746826171875, -69.85659790039062, 97.13630676269531, 14.882278442382812, 12.536575317382812, 115.02732849121094, 0.371612548828125, -72.87509155273438, 136.78366088867188, -9.89129638671875, 165.2882080078125, 312.91656494140625, 16.66217803955078, 44.44305419921875, 89.25447082519531, 75.58282470703125, 24.07781982421875, 87.21351623535156, 49.80543518066406, -1.94500732421875, -51.92219543457031, 58.055084228515625, -46.17665100097656, -52.570953369140625, 93.79850006103516, 146.67129516601562, 170.74374389648438, 108.53951263427734, 33.14305114746094, 64.53680419921875, -45.40556716918945, 127.94642639160156, -36.673065185546875, 170.2708740234375, 5.0030517578125, 44.4971923828125, 87.92120361328125, -9.86407470703125, 31.862457275390625, -2.510711669921875, 0.0, 71.22489166259766, 7.334207534790039, 58.4361572265625, 84.31967163085938, 183.84336853027344, 182.44955444335938, 61.45361328125, -21.088973999023438, -25.514846801757812, 81.345458984375, 103.63739013671875, -87.650146484375, 15.01202392578125, 105.19091033935547, 146.90994262695312, -4.2072296142578125, 148.42503356933594, 5.129051208496094, 248.21823120117188, -1.2672386169433594, 34.94671630859375, -15.047981262207031, 53.6976318359375, -15.581283569335938, -1.423492431640625, 120.23782348632812, 64.730224609375, 238.33831787109375, 136.1507568359375, -4.33453369140625, -20.674713134765625, -29.31854248046875, 33.51576232910156], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000173.npy"}
{"epoch": 0.362303664921466, "step": 174, "batch_size": 128, "mean": 48.883811950683594, "std": 69.09291076660156, "min": -173.5467529296875, "p10": -23.270744323730465, "median": 49.18971252441406, "p90": 141.5275665283203, "max": 201.89202880859375, "pos_frac": 0.7890625, "sample": [154.94573974609375, 0.6262931823730469, 6.200763702392578, -4.687904357910156, 153.77365112304688, 13.6737060546875, -95.36408996582031, 16.968536376953125, 103.37521362304688, 113.84722900390625, 48.077239990234375, 47.01458740234375, 16.373046875, 166.53707885742188, 1.3125457763671875, -5.90924072265625, 50.178558349609375, -24.576995849609375, 17.243301391601562, 76.00540161132812, 1.01641845703125, 158.87432861328125, 51.2191162109375, -10.271598815917969, -106.48439025878906, 73.62319946289062, 24.415206909179688, 120.227783203125, 12.789249420166016, 110.3228988647461, 80.9715576171875, -111.4029541015625, 147.15869140625, 140.73947143554688, 106.81625366210938, -173.5467529296875, 100.09042358398438, 116.83035278320312, 146.22463989257812, 70.48574829101562, -52.05760192871094, 103.1871337890625, -12.13482666015625, 175.46115112304688, 68.78408813476562, 80.5264892578125, -34.515289306640625, 65.75704956054688, 49.64137268066406, 97.70697021484375, 13.305255889892578, 123.41849517822266, 100.04548645019531, -22.710922241210938, 33.865325927734375, 37.11140441894531, 44.56780242919922, 4.356922149658203, 55.9630126953125, 64.56455993652344, 111.8465576171875, -19.723724365234375, 37.983154296875, -123.593505859375, -11.058074951171875, -17.556472778320312, 106.56585693359375, 58.56660461425781, 63.55541229248047, 82.37271881103516, 24.408653259277344, 53.16082763671875, 143.366455078125, -45.83447265625, 92.03945922851562, 52.06550598144531, -1.3626976013183594, 1.9762802124023438, 54.065521240234375, 32.06854248046875, 126.94841766357422, 127.29498291015625, 107.4320068359375, 70.32255554199219, 32.678802490234375, -7.350982666015625, -32.538818359375, 29.31871795654297, 35.94378662109375, 83.50406646728516, -11.151832580566406, 30.83794403076172, -1.2936553955078125, 88.22125244140625, 160.1737060546875, 61.14776611328125, -5.356658935546875, 90.32551574707031, 47.9949951171875, 120.49295043945312, 1.130544662475586, 201.89202880859375, 61.93426513671875, 146.41510009765625, 100.12108612060547, 85.2496337890625, 20.900863647460938, 14.625457763671875, 17.610153198242188, -49.033851623535156, 147.88751220703125, 154.94607543945312, 64.72404479980469, -16.71478271484375, 125.97479248046875, -164.33746337890625, 10.93359375, -64.42031860351562, 48.73805236816406, 37.28363037109375, 39.04803466796875, 53.310340881347656, 105.029296875, 137.00006103515625, 117.51235961914062, 36.64283752441406, 31.86712646484375, 28.44061279296875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000174.npy"}
{"epoch": 0.3643979057591623, "step": 175, "batch_size": 128, "mean": 43.8582649230957, "std": 85.67638397216797, "min": -172.43939208984375, "p10": -43.27787780761717, "median": 28.630722045898438, "p90": 150.83429870605468, "max": 442.9801025390625, "pos_frac": 0.6796875, "sample": [80.30453491210938, 156.45437622070312, 72.11666870117188, 1.1030941009521484, 28.01519775390625, 87.9569091796875, -87.26486206054688, 177.906005859375, 5.238758087158203, 122.78715515136719, 109.66554260253906, 162.6906280517578, -26.59161376953125, 155.5230712890625, -5.226409912109375, 115.98313903808594, 79.40298461914062, 29.246246337890625, 12.366729736328125, -1.1176948547363281, 15.961517333984375, 8.22116470336914, -51.836151123046875, -17.416824340820312, 147.45663452148438, 61.649658203125, 272.948486328125, 42.018096923828125, 76.61019897460938, -27.520187377929688, 160.82162475585938, -13.62811279296875, 13.327545166015625, -69.40046691894531, 37.66424560546875, 63.4008903503418, 19.51190185546875, 54.634063720703125, 86.60235595703125, 116.0174560546875, -60.32183837890625, 68.56773376464844, 158.4295654296875, 6.24395751953125, -6.493408203125, -4.2640380859375, -61.114288330078125, -115.48919677734375, 6.61004638671875, -17.535491943359375, 58.97344970703125, -7.966180801391602, 97.27284240722656, 30.070175170898438, 71.2239990234375, 54.61395263671875, -70.634765625, -18.411285400390625, -16.19256591796875, -34.15869140625, 23.288246154785156, 149.19158935546875, 68.56083679199219, -29.278045654296875, 116.26480102539062, -77.8306884765625, 37.162811279296875, 24.207595825195312, 13.06927490234375, 8.2696533203125, 24.627716064453125, 41.8638916015625, 40.292877197265625, -17.0718994140625, -20.83568572998047, -39.61004638671875, 36.25921630859375, 24.641311645507812, -21.978347778320312, 97.331787109375, 5.255767822265625, 156.38623046875, 109.98573303222656, -79.459228515625, -24.760162353515625, -76.769287109375, 97.33993530273438, -18.128021240234375, -64.4461669921875, 51.912200927734375, 82.21463012695312, 6.274318695068359, 15.072235107421875, 26.37286376953125, 105.97869873046875, -34.734588623046875, 45.09126281738281, -3.7700653076171875, 54.127952575683594, 442.9801025390625, 26.579010009765625, 148.50802612304688, 95.53125, 151.32244873046875, -6.296844482421875, -26.65399169921875, 37.24348449707031, -23.26776123046875, 8.487607955932617, 141.81036376953125, 60.7042236328125, 53.163551330566406, -5.909576416015625, 185.60357666015625, 0.0, -172.43939208984375, 20.982269287109375, 150.62509155273438, 61.960609436035156, 126.7764892578125, 89.39385986328125, 109.99468994140625, 115.60592651367188, 234.17190551757812, -152.85296630859375, 0.0, 83.63607025146484, 290.7984619140625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000175.npy"}
{"epoch": 0.36649214659685864, "step": 176, "batch_size": 128, "mean": 34.88468933105469, "std": 86.09901428222656, "min": -185.71957397460938, "p10": -77.76040649414062, "median": 36.27520751953125, "p90": 143.18510131835936, "max": 362.7159118652344, "pos_frac": 0.7109375, "sample": [-80.73799133300781, 18.42546844482422, 77.76217651367188, -65.68186950683594, -10.968048095703125, 36.008209228515625, 120.28247833251953, 20.6297607421875, 124.26446533203125, 3.4012699127197266, 3.16387939453125, -38.644432067871094, 34.13482666015625, 181.1917724609375, -46.18330383300781, 115.03905487060547, 107.19522094726562, 141.75433349609375, 151.41574096679688, -20.665283203125, 112.78594970703125, -101.07855224609375, -77.30548095703125, 45.75872802734375, 36.542205810546875, 63.86199951171875, 11.719062805175781, 120.09434509277344, 254.22735595703125, 106.780029296875, 196.24581909179688, 79.66024780273438, 60.654388427734375, 44.85478973388672, -126.33193969726562, -185.71957397460938, 27.406494140625, 146.5235595703125, 36.92631530761719, -1.6157798767089844, -62.933746337890625, 49.7454833984375, -0.22340774536132812, -97.84429931640625, 60.9871826171875, -52.07208251953125, 20.104339599609375, 102.97552490234375, 68.98016357421875, 10.9578857421875, -44.03472137451172, 199.21678161621094, -108.68997955322266, 100.36310577392578, 1.4214096069335938, 10.027252197265625, 78.573486328125, 122.30213165283203, 7.0830841064453125, 20.638961791992188, 14.63787841796875, 42.722747802734375, -42.0684814453125, -183.39974975585938, 91.13286590576172, 51.57122802734375, -122.8623046875, 94.63516235351562, -91.31138610839844, -2.2610244750976562, 61.785614013671875, -21.729827880859375, 71.16604614257812, 196.99505615234375, 73.31373596191406, 123.80160522460938, -25.48028564453125, 8.464021682739258, 57.24615478515625, 12.85659408569336, 72.60990905761719, -33.606689453125, 362.7159118652344, 12.83905029296875, 146.98309326171875, -137.07321166992188, 78.77638244628906, 54.9521484375, -89.14450073242188, 174.008056640625, 45.43791198730469, 59.74896240234375, 159.95053100585938, 0.387664794921875, 23.676828384399414, 5.470451354980469, 61.11808776855469, 122.02093505859375, 56.39190673828125, 44.75276184082031, 3.2607192993164062, 177.2568359375, 44.13055419921875, -22.978504180908203, 43.97802734375, -14.8612060546875, 94.880859375, -64.79840087890625, 47.887725830078125, 114.64631652832031, 11.9837646484375, -18.92911148071289, 13.124992370605469, -78.8218994140625, 52.07781982421875, 0.508636474609375, -104.03199768066406, 44.86097717285156, 98.61317443847656, 21.73763656616211, -69.59858703613281, 72.86306762695312, -46.81401062011719, -21.444366455078125, 69.59228515625, 148.84971618652344, 34.056610107421875, -31.37950897216797], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000176.npy"}
{"epoch": 0.36858638743455496, "step": 177, "batch_size": 128, "mean": 37.02976989746094, "std": 85.5105209350586, "min": -188.1947479248047, "p10": -59.00204162597655, "median": 30.80588150024414, "p90": 139.81331481933591, "max": 251.64398193359375, "pos_frac": 0.703125, "sample": [114.65863037109375, 6.523077011108398, 28.907058715820312, 179.46214294433594, -165.13174438476562, 243.73788452148438, 12.365615844726562, 138.67303466796875, 31.20464324951172, 84.99063110351562, 108.86897277832031, 73.17657470703125, 79.60362243652344, 77.89422607421875, -17.434906005859375, 15.710739135742188, 116.04244995117188, 82.99876403808594, -55.4935302734375, -31.159912109375, 72.39285278320312, -5.06689453125, 147.7213134765625, 18.651180267333984, 15.106842041015625, -126.45013427734375, 19.97760772705078, 52.485992431640625, 121.39667510986328, 71.6414794921875, 28.590599060058594, 154.5008544921875, -2.0918426513671875, 91.980224609375, 64.26622009277344, -33.754791259765625, 158.308837890625, 28.902292251586914, -7.741960525512695, 26.139617919921875, 32.624847412109375, 92.05381774902344, 40.59364318847656, 90.65692138671875, 251.64398193359375, 55.285491943359375, 41.85661315917969, 106.34637451171875, 30.407119750976562, 124.4698486328125, -25.651885986328125, 179.92059326171875, -16.391952514648438, -0.2264251708984375, 30.306915283203125, 142.47396850585938, -102.54443359375, 8.4444580078125, 27.909866333007812, 1.35845947265625, 183.1068115234375, 81.64813995361328, 185.01824951171875, 39.6298828125, 124.9542236328125, 8.334648132324219, 106.10333251953125, -73.38114929199219, 136.8196563720703, -18.399520874023438, 9.283693313598633, -24.641525268554688, -67.18856811523438, 34.18840026855469, -23.460800170898438, 134.84185791015625, 33.02349853515625, -17.419677734375, 97.78713989257812, -14.30810546875, -3.88360595703125, 43.86791229248047, -1.7431678771972656, 39.91587829589844, 42.24070739746094, -114.81625366210938, 1.801513671875, -91.15284729003906, 41.744232177734375, 18.482192993164062, 103.08462524414062, 130.736572265625, 117.56494140625, -1.710174560546875, -54.99176025390625, 54.150726318359375, -187.1529541015625, -51.24811553955078, -20.035316467285156, -47.711700439453125, -70.8804931640625, -50.42011260986328, 7.802663803100586, 17.60860824584961, 66.43081665039062, 106.73355102539062, 14.566436767578125, 56.872039794921875, -83.96295166015625, 246.798828125, 1.542633056640625, 1.443115234375, 110.09060668945312, -188.1947479248047, 197.29510498046875, 20.30438232421875, -170.37835693359375, -162.23648071289062, 40.36431121826172, 134.89788818359375, 40.590240478515625, 38.78926086425781, -10.3792724609375, -18.326080322265625, 2.10308837890625, 48.745574951171875, 62.609283447265625, 189.82376098632812], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000177.npy"}
{"epoch": 0.3706806282722513, "step": 178, "batch_size": 128, "mean": 52.971351623535156, "std": 81.70726013183594, "min": -137.79461669921875, "p10": -45.548004150390625, "median": 40.539085388183594, "p90": 155.941145324707, "max": 358.34808349609375, "pos_frac": 0.71875, "sample": [111.60130310058594, 5.539272308349609, 9.552841186523438, 41.6484375, 16.54595947265625, 130.67813110351562, -62.03424072265625, 98.492431640625, 0.0, 146.90817260742188, 191.24859619140625, 116.723388671875, -7.352203369140625, 116.60821533203125, 117.6177978515625, 152.82037353515625, 34.7523193359375, 58.471885681152344, 61.97576904296875, 84.38003540039062, -44.30534362792969, -9.921951293945312, 3.5008544921875, 147.74462890625, 54.962432861328125, 23.532730102539062, 64.39556884765625, -84.4808349609375, -22.10871124267578, 7.905242919921875, 110.17727661132812, 9.989654541015625, 3.55194091796875, 3.4752979278564453, 90.63211059570312, 163.8211669921875, 143.225341796875, 211.162841796875, -45.5213623046875, 136.89364624023438, 116.94692993164062, 193.067138671875, 14.402908325195312, 150.77728271484375, -25.11175537109375, -137.79461669921875, -14.920761108398438, -7.9012908935546875, 70.92578125, 56.067718505859375, 146.14474487304688, 48.6759033203125, 22.61223602294922, -19.356971740722656, -29.686386108398438, 133.80572509765625, -77.96832275390625, -2.1113357543945312, 358.34808349609375, 68.18621826171875, 20.32099151611328, 135.00738525390625, 7.885322570800781, -10.8619384765625, -6.179157257080078, 71.6682357788086, 216.89178466796875, 49.371307373046875, 83.18405151367188, 14.387924194335938, 70.52420043945312, 114.02740478515625, 112.12359619140625, -103.950927734375, -7.1059417724609375, 170.5562744140625, 35.573974609375, 90.87281799316406, 91.01528930664062, 25.501617431640625, -5.2550201416015625, 10.481781005859375, -9.83469009399414, 216.46255493164062, 70.68197631835938, -53.240875244140625, -11.658500671386719, 169.91612243652344, 7.8173675537109375, 81.37646484375, 32.28202819824219, 33.01481246948242, 47.0469970703125, 33.285003662109375, 25.9256591796875, -30.6497802734375, 95.77798461914062, 82.24415588378906, -48.662750244140625, 178.15948486328125, 176.922607421875, 121.0185546875, -52.91096496582031, 8.877151489257812, -38.830047607421875, 141.44903564453125, -5.502164840698242, -109.13626861572266, -8.36629867553711, 153.59197998046875, 51.98396301269531, 82.39892578125, -54.78826904296875, 39.42973327636719, -110.92295837402344, 18.076568603515625, 130.85525512695312, -45.61016845703125, -2.0615005493164062, 182.56787109375, 30.27178955078125, 0.6730804443359375, 63.70917510986328, 132.68365478515625, -49.30743408203125, 81.76455688476562, 114.16763305664062, 161.4225311279297], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000178.npy"}
{"epoch": 0.37277486910994767, "step": 179, "batch_size": 128, "mean": 42.62371826171875, "std": 78.98968505859375, "min": -164.4937744140625, "p10": -54.259265136718746, "median": 38.38934516906738, "p90": 146.80241088867186, "max": 227.18875122070312, "pos_frac": 0.734375, "sample": [85.59597778320312, 8.716903686523438, 102.19940185546875, -1.60101318359375, 2.3990402221679688, -60.50225830078125, 54.513092041015625, 61.656463623046875, 74.32235717773438, 69.32015991210938, 37.3734130859375, -11.35562515258789, 76.94805908203125, -138.87908935546875, -17.881561279296875, -79.83859252929688, -4.578266143798828, 50.18475341796875, 227.18875122070312, -141.14056396484375, 56.234832763671875, 99.655029296875, 44.511260986328125, 31.385345458984375, 6.531208038330078, 24.31842041015625, 126.71942138671875, 1.8692855834960938, -122.93666076660156, 169.99667358398438, -67.04849243164062, -23.35254669189453, 58.674591064453125, 115.94393920898438, 40.99908447265625, 26.30693817138672, 79.08966064453125, 73.7022705078125, 63.5020751953125, 13.486469268798828, 48.2626953125, 75.685302734375, 2.273712158203125, 99.82557678222656, 39.405277252197266, 11.442367553710938, -23.197925567626953, 176.67623901367188, 20.20257568359375, 60.28173828125, 25.42105484008789, 68.9166259765625, 179.55487060546875, 62.012237548828125, 113.00872802734375, 46.4951171875, 140.504150390625, -22.91925048828125, 216.7582550048828, 171.2843017578125, 85.47466278076172, 109.53944396972656, -13.88409423828125, -5.15234375, -47.323028564453125, 13.990966796875, 99.44580078125, -26.990646362304688, -67.93157958984375, -52.901336669921875, 34.89607238769531, 94.78085327148438, 33.3984375, 7.100109100341797, -50.926605224609375, 1.5715808868408203, 158.63601684570312, 185.5777130126953, 105.36688232421875, -57.427764892578125, 97.75628662109375, 65.33544921875, 25.32262420654297, 58.03801727294922, -164.4937744140625, 145.54632568359375, -130.62730407714844, 13.625350952148438, 30.906368255615234, -84.52926635742188, 139.59649658203125, 2.038005828857422, 192.98040771484375, 134.47607421875, -5.54742431640625, 3.9792404174804688, 149.7332763671875, -3.4708251953125, 185.19580078125, -47.962493896484375, -11.985687255859375, 55.102020263671875, 102.27972412109375, 4.482385635375977, 41.664695739746094, -99.04777526855469, 108.78441619873047, 2.9463424682617188, -72.926025390625, -14.79803466796875, 188.27557373046875, 40.31404113769531, 123.04141235351562, 22.041473388671875, 48.893470764160156, 31.612350463867188, 2.899425506591797, 119.03947448730469, 9.586433410644531, 89.82907104492188, 207.8306884765625, 12.815032958984375, 60.88043212890625, -7.9599609375, -2.57354736328125, -22.801513671875, 135.14146423339844, 139.20481872558594], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000179.npy"}
{"epoch": 0.374869109947644, "step": 180, "batch_size": 128, "mean": 54.108856201171875, "std": 79.96065521240234, "min": -115.32037353515625, "p10": -30.50534477233886, "median": 34.20249938964844, "p90": 156.33385620117187, "max": 453.0795593261719, "pos_frac": 0.7421875, "sample": [-0.8133583068847656, -5.228324890136719, -39.314361572265625, -2.1044082641601562, 117.30377197265625, 100.95562744140625, 197.43734741210938, 17.444503784179688, -21.06951904296875, 75.68548583984375, 81.36630249023438, -87.30874633789062, 0.59942626953125, 34.782684326171875, 10.600296020507812, -37.97563171386719, 155.23104858398438, -15.56866455078125, 5.169975280761719, 33.622314453125, 60.22747802734375, 100.22454833984375, -9.322565078735352, 39.6044921875, 180.80181884765625, 89.7784652709961, -41.23052978515625, 61.26300048828125, 8.573043823242188, 2.0123291015625, 108.42117309570312, 19.68731689453125, 24.269546508789062, -19.196945190429688, 148.1685791015625, 25.5948486328125, 159.26345825195312, 23.373931884765625, 0.8857994079589844, 48.61659240722656, 62.430450439453125, 139.79275512695312, 58.0506477355957, 63.6761474609375, -3.592498779296875, 26.65545654296875, 99.569091796875, 22.92535400390625, -86.69247436523438, 130.89480590820312, 100.3795166015625, -63.72749328613281, -26.09528350830078, 127.61590576171875, 132.91650390625, 79.51380157470703, 120.45545959472656, 91.48856353759766, 15.136940002441406, -77.67637634277344, 41.29193115234375, 3.903900146484375, -52.95001220703125, 102.72732543945312, 16.216705322265625, 168.6940460205078, -3.183868408203125, 131.86495971679688, 117.46376037597656, -11.274795532226562, 105.97294616699219, 10.680593490600586, 41.12921142578125, 97.11968994140625, 23.741302490234375, 177.74996948242188, 101.61202239990234, 67.7319107055664, 106.32003784179688, 87.47892761230469, 32.30780792236328, 158.90707397460938, 173.1334686279297, 32.115509033203125, 7.175912857055664, -9.564632415771484, 19.182193756103516, -15.619110107421875, 18.903396606445312, 130.25729370117188, -12.649200439453125, 124.22515869140625, 11.552709579467773, 110.33026123046875, 26.87176513671875, 27.370162963867188, 143.17349243164062, -115.32037353515625, -35.86138916015625, 165.47442626953125, 173.11712646484375, 74.85574340820312, -82.39901733398438, 166.4258575439453, 137.6961669921875, -35.80185317993164, 8.821273803710938, 41.19548034667969, 176.4781494140625, 80.60107421875, -53.25604248046875, 143.5684814453125, -9.27493667602539, -4.348140716552734, 129.85784912109375, 51.28509521484375, 453.0795593261719, 15.424324035644531, -28.23541259765625, 10.673625946044922, -3.911029815673828, 57.577239990234375, 83.30924987792969, 100.23481750488281, -23.417144775390625, 268.3577880859375, 12.06134033203125, -9.84893798828125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000180.npy"}
{"epoch": 0.3769633507853403, "step": 181, "batch_size": 128, "mean": 41.12602996826172, "std": 84.2320327758789, "min": -171.00738525390625, "p10": -64.52637023925782, "median": 39.09461975097656, "p90": 150.9046905517578, "max": 279.9373779296875, "pos_frac": 0.6875, "sample": [-4.543373107910156, 3.3966064453125, -67.22372436523438, -27.606414794921875, 18.299367904663086, 158.96864318847656, 24.8955078125, 70.09770202636719, 0.0, 124.5330810546875, 41.66804885864258, -63.370361328125, 224.54656982421875, -61.139671325683594, 59.586090087890625, 34.8553466796875, 37.16337585449219, 4.82977294921875, -88.98428344726562, 159.00392150878906, 48.2601318359375, 109.29222106933594, 66.950927734375, 53.4244384765625, -15.244583129882812, 47.3135986328125, -23.893404006958008, -79.60264587402344, 62.722816467285156, -5.7310791015625, 63.834259033203125, 154.42774963378906, 0.0, 47.92945861816406, 172.68753051757812, 33.82672119140625, 157.69473266601562, 34.69200134277344, 46.889801025390625, 92.47201538085938, 139.8625946044922, 36.504791259765625, 186.3941650390625, 102.3519287109375, 20.321470260620117, 112.133056640625, -68.59848022460938, -140.73843383789062, 15.250030517578125, 150.09121704101562, 58.882598876953125, 70.71881103515625, 279.9373779296875, -41.59423828125, 98.75933837890625, 94.73788452148438, -30.585861206054688, 153.1832275390625, 56.4471435546875, -13.948928833007812, -32.487144470214844, 21.81134033203125, 35.90266418457031, 16.942169189453125, 49.196197509765625, 5.9662322998046875, 61.596923828125, -14.0887451171875, 11.846992492675781, 126.77645874023438, -128.10903930664062, -5.429510116577148, -131.3051300048828, 167.12367248535156, -98.81631469726562, 138.49070739746094, -24.546119689941406, 21.57537841796875, 9.50531005859375, 58.495819091796875, 49.66499328613281, 115.81796264648438, 264.279296875, 226.6123046875, -34.7386474609375, -44.319252014160156, 41.02586364746094, 134.88275146484375, -1.92596435546875, -11.59307861328125, -10.542205810546875, 63.36883544921875, 45.03562927246094, 93.4315185546875, 91.1475830078125, -103.68896484375, 139.63937377929688, -2.5844650268554688, 152.80279541015625, 19.645965576171875, 47.33673095703125, 124.43472290039062, 59.6951904296875, -106.28546142578125, -4.373695373535156, 18.290496826171875, 139.34539794921875, -10.297470092773438, 32.962646484375, 3.8918685913085938, -36.88873291015625, 94.93562316894531, -171.00738525390625, 76.06074523925781, -51.265045166015625, 43.29412841796875, -55.37548828125, -149.7479248046875, 119.10385131835938, 31.4375, 132.11209106445312, -101.9876708984375, 34.40965270996094, 70.465087890625, 94.86172485351562, 82.74179077148438, 108.28287506103516, 122.25593566894531], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000181.npy"}
{"epoch": 0.37905759162303665, "step": 182, "batch_size": 128, "mean": 58.74577331542969, "std": 75.47310638427734, "min": -150.49676513671875, "p10": -24.12133102416992, "median": 52.325286865234375, "p90": 163.26941375732417, "max": 266.2485656738281, "pos_frac": 0.7890625, "sample": [57.708099365234375, 89.43009948730469, 66.77206420898438, -1.447122573852539, 53.0643310546875, 73.79489135742188, 19.576133728027344, 7.420799255371094, 81.49773406982422, 102.1226806640625, 80.06732177734375, 66.9207763671875, 150.43051147460938, 179.744140625, 107.70697021484375, 221.63490295410156, 180.37905883789062, -23.244606018066406, 113.69155883789062, 103.33489990234375, 139.78329467773438, 157.4510040283203, 47.7747802734375, 90.58305358886719, 17.00775146484375, 59.84480285644531, -4.8012847900390625, -47.4732666015625, 204.884033203125, -32.45633316040039, -26.167022705078125, 0.711761474609375, 48.11286926269531, 33.95066833496094, 186.39736938476562, 89.4364013671875, -49.934051513671875, -40.245941162109375, 55.05541229248047, -8.878173828125, 2.419189453125, 11.216018676757812, 65.25234985351562, 246.91925048828125, 136.8404541015625, 266.2485656738281, 124.49935913085938, -17.9007568359375, 182.57608032226562, 5.946819305419922, -133.3042449951172, 9.308792114257812, -42.7283935546875, 4.41650390625, 3.0779190063476562, 115.50745391845703, -16.548858642578125, 0.7388038635253906, 100.83909606933594, -8.589942932128906, 16.12512969970703, 145.70455932617188, 67.66213989257812, -5.2658233642578125, 54.47319030761719, -21.153900146484375, 99.91281127929688, 82.3050537109375, -30.505126953125, 104.31204223632812, -150.49676513671875, 30.554122924804688, 1.271087646484375, 155.931640625, 12.815292358398438, 96.63410949707031, 31.097227096557617, 39.452003479003906, 30.28839111328125, 78.00634765625, 51.58624267578125, 37.434593200683594, -0.793609619140625, -16.6702880859375, 4.928466796875, 55.89446258544922, 64.66973876953125, 30.38812255859375, 64.23233032226562, 10.801239013671875, 104.7581787109375, 95.01043701171875, -7.07763671875, 125.68865966796875, 184.1734619140625, -31.78057861328125, 49.0526123046875, 65.8897705078125, 56.693084716796875, 27.8359375, 203.02301025390625, 3.4850616455078125, 134.46673583984375, -72.65916442871094, 210.25445556640625, 130.00466918945312, 111.41081237792969, 17.057540893554688, 77.04086303710938, 39.035888671875, 87.26141357421875, 115.01641845703125, 83.30087280273438, 40.47793197631836, 3.311370849609375, -5.69049072265625, 176.845703125, 41.91986083984375, 8.411712646484375, 123.11763000488281, 129.40777587890625, 146.83535766601562, -33.7451171875, -11.430320739746094, 215.61859130859375, 33.8853759765625, 50.6318359375, -59.05259704589844], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000182.npy"}
{"epoch": 0.381151832460733, "step": 183, "batch_size": 128, "mean": 50.40031433105469, "std": 83.63432312011719, "min": -130.59100341796875, "p10": -57.959014892578125, "median": 42.318389892578125, "p90": 164.66685791015624, "max": 337.454833984375, "pos_frac": 0.75, "sample": [-55.98944091796875, 139.39263916015625, -21.168899536132812, 80.07296752929688, 92.52192687988281, 48.482269287109375, 26.144023895263672, 128.037841796875, -18.461936950683594, 212.93414306640625, 26.600341796875, -50.49114990234375, 165.19390869140625, 89.00855255126953, 5.50421142578125, 113.07659912109375, 74.109375, 11.6910400390625, 66.40200805664062, 94.70318603515625, 129.7346649169922, 83.631103515625, 65.39703369140625, -7.2614288330078125, 57.52996826171875, 46.98976135253906, 27.417015075683594, 0.17609596252441406, 92.31719970703125, -0.4776611328125, 21.191650390625, -101.99345397949219, -71.59710693359375, 184.12570190429688, -86.9505615234375, 43.81353759765625, 92.5347900390625, 147.81277465820312, 45.8087158203125, 63.20252990722656, 14.230606079101562, 21.57501220703125, 74.58111572265625, 41.618804931640625, 105.11103820800781, -50.35552978515625, 0.0, 25.458173751831055, 143.91506958007812, -15.925704956054688, 51.30279541015625, 164.44097900390625, -84.28001403808594, 14.973892211914062, -49.03521728515625, 144.9877166748047, 12.067432403564453, 103.54849243164062, 43.017974853515625, 189.89620971679688, 34.67840576171875, 26.094451904296875, -71.12432861328125, 194.00714111328125, 150.36569213867188, 9.814422607421875, -57.144927978515625, 41.045135498046875, 337.454833984375, 11.151199340820312, 81.26002502441406, -82.7044677734375, 62.70713806152344, -106.83062744140625, 119.5973892211914, 64.22109985351562, 17.574066162109375, -61.61308288574219, -43.27806854248047, 74.483154296875, 173.20303344726562, 34.7593994140625, 20.086456298828125, 28.87923812866211, 56.57526397705078, -31.21197509765625, 0.04148101806640625, 186.86387634277344, 31.638473510742188, 43.33160400390625, 134.53878784179688, -7.0629425048828125, -0.507537841796875, 61.25639343261719, 87.24661254882812, 85.31338500976562, 203.90740966796875, 8.237213134765625, -18.08673095703125, -112.98980712890625, 39.799957275390625, 104.77709197998047, 176.21441650390625, -28.921096801757812, 20.9443359375, 78.06086730957031, -12.915191650390625, -130.59100341796875, 192.54653930664062, 205.7506561279297, 2.0114078521728516, 149.74722290039062, -67.44093322753906, 24.641876220703125, 28.113525390625, 44.25315856933594, -25.42481231689453, 280.8691711425781, 89.71446228027344, 143.20993041992188, 11.23482894897461, 9.31781005859375, -59.858551025390625, 70.79786682128906, -63.991363525390625, 116.52813720703125, 106.38108825683594, 145.39532470703125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000183.npy"}
{"epoch": 0.3832460732984293, "step": 184, "batch_size": 128, "mean": 66.96247863769531, "std": 83.58285522460938, "min": -138.19000244140625, "p10": -26.19754486083984, "median": 59.872806549072266, "p90": 171.05941772460938, "max": 310.09033203125, "pos_frac": 0.796875, "sample": [123.82595825195312, 248.78277587890625, 262.0005798339844, -5.5314483642578125, 41.75640869140625, -25.42028045654297, 159.81439208984375, 310.09033203125, 211.4727783203125, 126.16773986816406, 45.715484619140625, 126.76255798339844, 5.8199005126953125, 119.083740234375, 46.766929626464844, -88.020263671875, 95.01593017578125, 92.63818359375, 137.5869140625, 133.98379516601562, -20.447463989257812, 21.914566040039062, 4.78509521484375, 130.88430786132812, 182.84466552734375, 44.52447509765625, 21.30645751953125, 121.36328125, 146.42755126953125, 74.94766235351562, 121.17184448242188, -22.5838623046875, 80.19142150878906, -31.804046630859375, 80.03338623046875, -8.497787475585938, 8.7115478515625, 79.44197082519531, 102.12152099609375, 130.1738739013672, 74.97314453125, 265.86395263671875, -138.19000244140625, 87.81304931640625, 76.1932373046875, 17.830581665039062, -48.048553466796875, 27.6036376953125, 63.952392578125, 103.01983642578125, 13.5997314453125, 27.1961669921875, 35.128684997558594, 128.16519165039062, 50.991943359375, 178.4638671875, -10.452713012695312, -111.44511413574219, 90.6097412109375, 77.76046752929688, 113.12901306152344, -0.036376953125, -77.46635437011719, 57.827964782714844, 102.3577880859375, 190.39556884765625, 22.261215209960938, 62.5765380859375, 56.771942138671875, 165.1375732421875, -63.46710205078125, 42.33699035644531, -80.99604797363281, 171.056396484375, 113.6353759765625, 25.318038940429688, 141.82345581054688, -1.08453369140625, 33.921661376953125, 203.6038818359375, 16.54570770263672, 100.88306427001953, 189.10052490234375, 14.655601501464844, 8.933036804199219, 7.655723571777344, 129.24639892578125, 95.65634155273438, 14.651290893554688, 45.965118408203125, -26.935943603515625, 132.8865966796875, 22.814361572265625, -72.45913696289062, 163.31878662109375, 15.40545654296875, 7.033355712890625, 45.217018127441406, -15.350723266601562, 13.512542724609375, 136.4307861328125, 183.7210693359375, -67.6531982421875, 68.1973876953125, -30.16309356689453, 124.163330078125, 27.987586975097656, 120.65374755859375, 37.694061279296875, 3.731975555419922, 146.85406494140625, -103.46240234375, 35.13763427734375, 127.11053466796875, 148.43394470214844, 105.69338989257812, -20.695030212402344, 166.09642028808594, 58.73657989501953, -11.091644287109375, 113.042724609375, 25.09571075439453, 61.009033203125, -0.6362686157226562, 247.96047973632812, 171.06646728515625, -25.881088256835938, 87.2724609375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000184.npy"}
{"epoch": 0.38534031413612563, "step": 185, "batch_size": 128, "mean": 53.508567810058594, "std": 86.5050048828125, "min": -161.01730346679688, "p10": -42.797471618652345, "median": 46.470062255859375, "p90": 167.91369018554687, "max": 339.20867919921875, "pos_frac": 0.71875, "sample": [62.55926513671875, -61.672088623046875, 123.4853515625, -49.820037841796875, 84.05632781982422, 17.95562744140625, 3.8871002197265625, 106.90908813476562, 72.16412353515625, -47.9296875, 67.74107360839844, 136.02362060546875, 33.53802490234375, 210.34912109375, -14.582199096679688, -24.530960083007812, 51.61000061035156, 141.77857971191406, 15.314498901367188, 59.641845703125, -39.23272705078125, 29.7069091796875, 111.90526580810547, 113.40109252929688, 171.39291381835938, -12.694992065429688, 54.851593017578125, 0.0, 29.945648193359375, -126.2694091796875, 198.78155517578125, -76.3623046875, 17.756637573242188, 46.42486572265625, 109.8596420288086, 233.6702880859375, 58.3101806640625, 111.90303039550781, 99.673828125, 213.96701049804688, 153.74256896972656, 53.42266845703125, 113.64555358886719, 118.12834167480469, -101.16360473632812, -31.04766845703125, 27.879180908203125, -2.331125259399414, 145.236572265625, -7.120542526245117, -43.7763671875, 129.5635986328125, -4.257537841796875, 48.48411560058594, 60.09637451171875, -50.32562255859375, 51.481658935546875, 31.11037826538086, 168.8831329345703, -41.357757568359375, 100.407470703125, 174.48202514648438, 37.46427917480469, -116.42742919921875, 191.0247802734375, -13.196693420410156, 121.01055908203125, 49.742149353027344, 51.25286865234375, 92.939453125, 5.516593933105469, 157.93142700195312, 22.055450439453125, 46.5247802734375, 62.966583251953125, 24.52859878540039, 133.8231201171875, 0.0, 1.9306259155273438, 6.692657470703125, 55.856781005859375, 86.33148193359375, -18.953975677490234, 4.341705322265625, 246.32611083984375, -13.182723999023438, 20.020797729492188, 7.881111145019531, 209.81536865234375, 20.050567626953125, 159.56878662109375, 68.7779541015625, 115.42984771728516, 41.93058776855469, -10.790557861328125, -42.37794494628906, 71.28173828125, 121.0457992553711, 46.08207702636719, 339.20867919921875, -7.70208740234375, 133.69287109375, 24.171844482421875, 17.43988800048828, 10.126272201538086, 144.061279296875, -161.01730346679688, 50.82018280029297, -22.284011840820312, -68.6644058227539, 22.82677459716797, 3.3327560424804688, -100.31869506835938, 40.04461669921875, -5.50621223449707, 167.4982147216797, 118.61697387695312, 282.4512939453125, 70.91534423828125, 59.58385467529297, -95.83123779296875, -16.917133331298828, 46.5152587890625, 246.20504760742188, -9.576316833496094, -11.352325439453125, -1.1305694580078125, 76.01708984375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000185.npy"}
{"epoch": 0.387434554973822, "step": 186, "batch_size": 128, "mean": 45.68077087402344, "std": 79.802490234375, "min": -157.68783569335938, "p10": -41.45546264648437, "median": 39.3856201171875, "p90": 148.48627319335938, "max": 284.6292724609375, "pos_frac": 0.71875, "sample": [87.92044067382812, 90.7802734375, 162.50350952148438, 64.67730712890625, 106.4400634765625, 12.279541015625, -31.77374267578125, -144.97882080078125, 108.82025146484375, 1.2537651062011719, 193.556396484375, -4.9565582275390625, 63.784156799316406, 69.89108276367188, 59.59831237792969, 141.8323974609375, 116.19449615478516, -25.5435791015625, 178.15185546875, 7.714630126953125, 18.90032196044922, 128.19607543945312, -157.68783569335938, -22.441965103149414, 8.589340209960938, -31.051605224609375, 150.554443359375, 249.8271484375, 32.69818115234375, 22.82897186279297, 176.73489379882812, -0.2815418243408203, -11.884063720703125, 142.82559204101562, 120.9896240234375, -12.62457275390625, 38.62396240234375, 79.84036254882812, 12.2330322265625, 70.260986328125, 40.14727783203125, -59.4461669921875, -65.04486083984375, 23.588897705078125, 160.0269775390625, 3.75421142578125, -37.24122619628906, -29.184219360351562, 37.5694580078125, 73.61627197265625, 68.8192138671875, 44.85845947265625, 54.23054504394531, 147.59991455078125, -52.289154052734375, 7.3469390869140625, 61.2017822265625, -68.19129943847656, 42.90032958984375, -21.848533630371094, 284.6292724609375, 64.63687896728516, -66.05201721191406, 140.6107177734375, 35.604705810546875, 34.526920318603516, 43.530731201171875, 82.50314331054688, 29.039466857910156, 41.3590087890625, 84.40471649169922, 152.77015686035156, 22.280773162841797, 96.5198974609375, 107.349609375, 73.39620971679688, 61.900482177734375, 80.16465759277344, 8.636016845703125, -7.728118896484375, 142.71551513671875, 123.55204010009766, 35.27386474609375, 85.48056030273438, 198.5654296875, 28.34912109375, 29.497833251953125, 42.180137634277344, -100.16293334960938, -113.7916259765625, -32.895172119140625, 7.52911376953125, -12.752197265625, -21.670082092285156, -1.8147430419921875, -9.976425170898438, -13.5887451171875, 104.6217041015625, -64.74807739257812, 28.16961669921875, 115.11966705322266, 107.5426025390625, -45.38104248046875, 5.647529602050781, -3.549224853515625, -39.7730712890625, 87.51976776123047, 74.83184814453125, -13.6951904296875, -24.963531494140625, 135.05914306640625, -115.55453491210938, 54.54625701904297, -113.23886108398438, 2.041177749633789, 234.93865966796875, 63.77241516113281, 64.41211700439453, 31.87824249267578, -24.7850341796875, 114.31936645507812, 0.2757110595703125, 171.05859375, 207.00701904296875, 62.3621826171875, 13.087142944335938, 98.36611938476562, 51.9832763671875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000186.npy"}
{"epoch": 0.38952879581151834, "step": 187, "batch_size": 128, "mean": 42.4754638671875, "std": 90.03567504882812, "min": -269.5294189453125, "p10": -65.80813598632811, "median": 42.163978576660156, "p90": 158.09309997558594, "max": 277.5593566894531, "pos_frac": 0.6640625, "sample": [138.45462036132812, 174.01844787597656, 95.7928466796875, 106.27053833007812, 47.23846435546875, -92.88662719726562, -29.573898315429688, 25.015724182128906, 115.80361938476562, 55.266380310058594, 149.1524658203125, 97.1265869140625, -74.93951416015625, 49.36328125, 0.41644287109375, 91.02326965332031, 122.11647033691406, 119.91873168945312, -34.57258605957031, -252.927734375, -14.930938720703125, 3.770904541015625, -41.24882507324219, 113.71224212646484, 42.088623046875, 41.88838195800781, -39.411903381347656, -18.3687744140625, -11.246002197265625, 91.57305145263672, 7.222295761108398, 41.66717529296875, -269.5294189453125, 55.926910400390625, 205.49227905273438, -7.846893310546875, -15.57720947265625, -2.537139892578125, 52.63880920410156, 135.23388671875, 140.07391357421875, 14.013427734375, 109.8057861328125, -54.0889892578125, -18.737510681152344, 142.36669921875, -62.20526123046875, -79.05438232421875, 58.874603271484375, 75.5081787109375, 110.65261840820312, -21.535396575927734, -30.29583740234375, 40.16473388671875, 155.4150390625, 114.90655517578125, 22.873062133789062, -30.472694396972656, 25.206451416015625, 23.792160034179688, 95.74893188476562, -1.6102027893066406, 23.407123565673828, 42.23933410644531, -97.55093383789062, 193.61395263671875, 183.2535400390625, 9.6405029296875, 63.49835205078125, -49.773895263671875, 140.7149658203125, -53.19453430175781, -19.235137939453125, -11.132110595703125, 158.68502807617188, -101.85711669921875, 89.01327514648438, 46.19364929199219, 36.203304290771484, 68.78985595703125, -15.898130416870117, -20.665618896484375, 68.68899536132812, -120.11300659179688, -9.050741195678711, 142.3095703125, 11.122833251953125, -16.986297607421875, -157.1544189453125, 184.91070556640625, 116.41232299804688, 170.59512329101562, -74.21484375, 111.01779174804688, 69.1866455078125, -45.77379608154297, -5.11419677734375, 47.82373046875, 173.11647033691406, 114.44589233398438, -10.675537109375, 140.18174743652344, 166.2977294921875, 14.603759765625, -85.33123779296875, -101.72406005859375, 36.69139099121094, 171.94149780273438, 105.37321472167969, 37.38299560546875, 137.84365844726562, -90.59016418457031, 197.1692352294922, 48.011932373046875, 51.635528564453125, 45.705078125, 58.86663818359375, 62.54669189453125, 67.95230102539062, 89.63116455078125, 178.82278442382812, 54.183326721191406, 32.450958251953125, 277.5593566894531, 0.0, 157.83941650390625, -18.8812255859375, 40.20648193359375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000187.npy"}
{"epoch": 0.39162303664921466, "step": 188, "batch_size": 128, "mean": 49.235084533691406, "std": 96.7362289428711, "min": -178.86273193359375, "p10": -76.4723907470703, "median": 40.45985412597656, "p90": 168.67115936279296, "max": 363.6944580078125, "pos_frac": 0.703125, "sample": [8.961944580078125, 70.6253662109375, 42.8333740234375, 0.0, 116.34510803222656, -82.38348388671875, 48.7572021484375, 53.0067138671875, 163.65509033203125, 42.4302978515625, -6.218544006347656, 161.7022705078125, 96.55712890625, 16.44091796875, 91.46685791015625, 108.85943603515625, 25.191329956054688, 3.666351318359375, 50.53102111816406, 146.56166076660156, -0.23143768310546875, -143.19589233398438, 64.17306518554688, 38.489410400390625, -74.73983764648438, 77.2467041015625, 72.26824188232422, 139.0238037109375, 50.92915344238281, 31.469375610351562, 144.283447265625, -39.155517578125, 53.133689880371094, 2.680450439453125, 28.504852294921875, 308.6523132324219, 111.59817504882812, 163.7879638671875, 102.35127258300781, 33.86311340332031, 252.58172607421875, 183.54281616210938, -123.70346069335938, 22.49837875366211, -133.4873809814453, -34.97039794921875, 73.66384887695312, 233.04046630859375, 118.9751205444336, 172.0510711669922, 12.946044921875, 167.22262573242188, 60.52392578125, 29.95550537109375, -40.618385314941406, -131.00595092773438, 71.44197082519531, -1.1536808013916016, 45.43962097167969, 119.26722717285156, 163.79017639160156, -14.505722045898438, 7.850311279296875, -85.34210205078125, 9.383211135864258, 141.94952392578125, 103.08612060546875, 21.890838623046875, 29.0411376953125, 62.886932373046875, 3.1573867797851562, 150.0186767578125, -29.920257568359375, -149.05078125, -19.4805908203125, -7.96099853515625, 175.39801025390625, 60.08268737792969, 134.6109619140625, 363.6944580078125, 175.90402221679688, -16.20492935180664, 16.221710205078125, 24.715850830078125, 185.021728515625, 9.3370361328125, 102.92341613769531, 14.915019989013672, 107.4112548828125, -18.50006103515625, -178.86273193359375, 176.169189453125, -42.95208740234375, -103.66714477539062, 0.0, 53.829742431640625, 151.18276977539062, -28.88959503173828, 16.717529296875, 21.188690185546875, 223.26483154296875, -97.5833740234375, 65.94403076171875, 112.915283203125, -80.5150146484375, 143.6602020263672, 139.70970153808594, -7.227783203125, 180.6451416015625, -2.2265090942382812, 27.72411346435547, 148.11293029785156, 79.10317993164062, 152.32125854492188, -20.990829467773438, -60.37681579589844, 239.3442840576172, -18.933029174804688, -96.62152099609375, 5.879838943481445, -130.6599578857422, -57.2984619140625, 61.3575439453125, -43.21673583984375, 53.10882568359375, 112.78558349609375, 31.499603271484375, -69.00994873046875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000188.npy"}
{"epoch": 0.393717277486911, "step": 189, "batch_size": 128, "mean": 62.302391052246094, "std": 83.8476333618164, "min": -148.95140075683594, "p10": -26.02607078552246, "median": 49.30169677734375, "p90": 165.32469787597654, "max": 393.3802490234375, "pos_frac": 0.8046875, "sample": [83.19070434570312, 70.44097900390625, -116.3177490234375, 34.31809997558594, 148.79319763183594, 132.40740966796875, 23.62909698486328, 104.37848663330078, 185.69419860839844, -26.042722702026367, 88.05804443359375, 68.88360595703125, 196.48074340820312, -18.929183959960938, -59.3189697265625, -107.4000244140625, 7.595458984375, 21.332794189453125, 137.0633087158203, 38.46058654785156, 393.3802490234375, 125.37887573242188, 111.33709716796875, 237.34336853027344, 16.560577392578125, 57.4658203125, 37.424957275390625, -32.61395263671875, 59.33184814453125, 3.8027725219726562, 20.31683349609375, -90.23020935058594, 30.4058837890625, -49.865325927734375, 12.707687377929688, 121.6884765625, 39.23162078857422, 3.9884033203125, 27.832260131835938, 74.8328628540039, 151.56631469726562, 178.71868896484375, -15.434860229492188, 114.40576171875, -71.26144409179688, 136.2109375, 23.21319580078125, 32.82586669921875, 10.363929748535156, 92.7287826538086, 54.09453582763672, 11.704719543457031, 34.064697265625, 126.36178588867188, 119.17964172363281, 103.80510711669922, 101.61093139648438, -0.319000244140625, 163.53915405273438, -148.95140075683594, 45.56487274169922, 81.39945983886719, 110.17428588867188, 28.92015838623047, 111.7671127319336, 95.4024658203125, 80.93763732910156, 98.25900268554688, 2.9321117401123047, 150.4891357421875, 136.61424255371094, 61.96575927734375, 50.20806884765625, 37.47370910644531, 48.39532470703125, 55.616607666015625, 280.79901123046875, 19.395523071289062, 11.235618591308594, 42.73838806152344, -10.891487121582031, 90.68896484375, -0.759674072265625, 2.57080078125, 24.770912170410156, 0.0, 53.7117919921875, 200.0283203125, -6.316230773925781, 1.484964370727539, -0.3121337890625, 225.25454711914062, 108.879150390625, 214.01019287109375, -7.286170959472656, 198.637451171875, 173.40829467773438, 45.254852294921875, 144.55361938476562, 146.9639892578125, 271.556396484375, 87.89712524414062, 15.67706298828125, -39.272216796875, 111.61483001708984, 8.074256896972656, -62.62493896484375, 67.81466674804688, 131.26202392578125, 58.63493347167969, 132.21676635742188, -46.67881774902344, 33.942230224609375, 6.697120666503906, 139.65966796875, 91.2061767578125, 10.6845703125, 169.490966796875, 65.52763366699219, 15.867401123046875, 101.65219116210938, -102.5760498046875, 69.31945037841797, 0.0, 43.47076416015625, 37.89198303222656, -4.68768310546875, -26.01893424987793], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000189.npy"}
{"epoch": 0.3958115183246073, "step": 190, "batch_size": 128, "mean": 53.396202087402344, "std": 80.17707824707031, "min": -141.3524627685547, "p10": -40.27691040039061, "median": 42.92799377441406, "p90": 164.5525421142578, "max": 278.2977600097656, "pos_frac": 0.765625, "sample": [-18.71587371826172, 43.669830322265625, 144.88150024414062, 32.10467529296875, -15.247947692871094, 30.8857421875, 130.23782348632812, -54.149261474609375, 38.5093994140625, -26.286224365234375, 61.78593444824219, -65.34933471679688, 132.05929565429688, 156.22018432617188, 66.74895477294922, -23.810821533203125, 76.719482421875, 64.29562377929688, 73.0328369140625, 13.516387939453125, 101.41217041015625, -36.7392578125, 184.86392211914062, -28.672409057617188, 19.561798095703125, 2.6039581298828125, 69.70979309082031, 105.52641296386719, 13.40867805480957, 16.1956787109375, 25.321533203125, 29.63470458984375, 20.411712646484375, 15.91963005065918, -77.58432006835938, -11.594158172607422, 199.67044067382812, 21.50519561767578, 175.79649353027344, 93.71994018554688, 2.2928504943847656, -1.6213569641113281, -71.92179107666016, 186.96408081054688, 87.05797576904297, -9.65643310546875, -24.604095458984375, 74.85450744628906, 110.80471801757812, 105.28553771972656, 100.78058624267578, 75.19062805175781, 109.93585205078125, 71.808837890625, 54.956146240234375, 143.74761962890625, 128.4979248046875, 29.6395263671875, 0.8333892822265625, 36.738311767578125, 81.75738525390625, 89.50172424316406, -53.51220703125, 35.274261474609375, -141.3524627685547, 45.710693359375, 190.02357482910156, -48.85685729980469, 180.11920166015625, -115.73361206054688, -27.114654541015625, 256.2091979980469, 137.89755249023438, -48.53143310546875, 169.4617919921875, -13.445449829101562, 6.858699798583984, 18.722946166992188, 12.13714599609375, -8.052627563476562, 44.859130859375, 0.0, 56.41351318359375, 216.23301696777344, 12.27587890625, 133.3485107421875, 24.305191040039062, 3.9385986328125, 51.25807189941406, 21.343048095703125, 8.544754028320312, 278.2977600097656, -9.220184326171875, 5.219449996948242, 52.776466369628906, 116.39112854003906, -50.1875, 113.89276123046875, 76.07183837890625, 40.24004364013672, 97.53553009033203, 36.348358154296875, 100.17366790771484, 129.21499633789062, 28.89398956298828, 122.77215576171875, 234.39248657226562, -64.67814636230469, -18.11920166015625, 42.1861572265625, 55.99554443359375, 62.36027526855469, -20.290145874023438, 114.67040252685547, 224.0045166015625, 109.99263000488281, 47.668304443359375, 55.43743896484375, 4.8900604248046875, 192.81134033203125, 162.44857788085938, 156.69757080078125, 1.2594642639160156, -123.91708374023438, 4.976531982421875, 128.0589599609375, -80.69912719726562, 53.187469482421875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000190.npy"}
{"epoch": 0.39790575916230364, "step": 191, "batch_size": 128, "mean": 53.573387145996094, "std": 88.1821060180664, "min": -208.60311889648438, "p10": -36.850196075439456, "median": 38.72075271606445, "p90": 164.0312042236328, "max": 348.642822265625, "pos_frac": 0.7265625, "sample": [191.35394287109375, 164.4888916015625, 45.26849365234375, -5.8730621337890625, -57.15824890136719, 119.42083740234375, 10.815673828125, -118.54302978515625, 24.998027801513672, 134.60006713867188, 15.864568710327148, 75.59913635253906, 44.823219299316406, 52.87884521484375, 35.46954345703125, 103.11831665039062, 137.7729949951172, 17.528228759765625, 167.22006225585938, 37.10368347167969, 82.96463012695312, 81.07305908203125, 72.22540283203125, 52.02833557128906, -28.392974853515625, 132.73818969726562, -1.0464191436767578, 217.3004150390625, 113.19154357910156, 101.57121276855469, 91.93450927734375, 163.785888671875, 128.3677215576172, 132.65066528320312, 30.457473754882812, 36.86738204956055, 111.96578979492188, -13.13604736328125, -156.39755249023438, 47.55181884765625, 21.42230224609375, 159.77120971679688, 125.74528503417969, -0.11144256591796875, 187.0699462890625, 101.09354400634766, 51.817108154296875, -58.750244140625, 51.86505126953125, -3.165374755859375, 19.761962890625, -0.766021728515625, 246.3282470703125, -10.407455444335938, 120.41144561767578, 68.14958953857422, -63.14231872558594, 158.3302001953125, 139.33770751953125, -118.13449096679688, -6.191436767578125, -10.549636840820312, -21.875686645507812, 100.611328125, -43.41679382324219, -34.505279541015625, -37.101165771484375, 113.99947357177734, 29.14527130126953, 151.00238037109375, 128.21469116210938, 43.603370666503906, 2.116710662841797, 31.218399047851562, 58.856170654296875, -108.8157958984375, 35.720252990722656, 87.69364929199219, -6.16455078125, 89.82852172851562, 103.43797302246094, -208.60311889648438, 6.688396453857422, -34.428497314453125, 192.343017578125, 191.18429565429688, 40.33782196044922, -1.241485595703125, 159.51467895507812, 256.79412841796875, 141.80648803710938, 163.83505249023438, -11.030723571777344, 31.210845947265625, -36.742637634277344, 47.008056640625, 90.28636169433594, 16.74304962158203, -12.024627685546875, 8.2642822265625, 8.605125427246094, 348.642822265625, 57.9903564453125, 29.00457763671875, 188.1468505859375, 5.615074157714844, 30.933273315429688, 78.74515533447266, 10.637397766113281, 107.76663208007812, 11.676397323608398, 9.620414733886719, 83.43099975585938, 6.996551513671875, 180.65103149414062, -28.338790893554688, -69.5173110961914, 31.52099609375, -44.31561279296875, -160.16485595703125, 64.95745849609375, -3.2937278747558594, 14.491928100585938, -16.1343994140625, 0.0, 151.06695556640625, 0.15174293518066406, 218.65582275390625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000191.npy"}
{"epoch": 0.4, "step": 192, "batch_size": 128, "mean": 42.11943817138672, "std": 80.23103332519531, "min": -198.83416748046875, "p10": -46.52562522888183, "median": 37.2530517578125, "p90": 133.6213836669922, "max": 284.4410400390625, "pos_frac": 0.703125, "sample": [109.43017578125, 107.540771484375, 87.43460083007812, -198.83416748046875, -3.8504180908203125, 129.05789184570312, 41.345977783203125, 13.965478897094727, -16.2823486328125, 86.81988525390625, -187.88827514648438, 182.52735900878906, 0.0, 0.0, 3.9909133911132812, -23.24554443359375, 152.76632690429688, 77.75347900390625, 116.66790771484375, 130.16586303710938, -18.456382751464844, -43.800357818603516, 168.5738525390625, 90.60696411132812, 5.426799774169922, 27.742645263671875, -11.760498046875, -15.84747314453125, 60.50464630126953, 70.87791442871094, 12.399627685546875, -5.9390411376953125, 89.838134765625, 53.52972412109375, -52.88458251953125, 29.960693359375, -13.114349365234375, 130.77757263183594, 36.3641357421875, 121.77752685546875, -80.85919189453125, 272.42742919921875, -43.03660583496094, -114.01773071289062, -129.863525390625, 62.117431640625, -55.2335205078125, -2.8858413696289062, 6.688678741455078, 75.81243896484375, 4.81988525390625, 11.81732177734375, 56.74775695800781, 132.45880126953125, -13.55706787109375, -5.8820037841796875, -88.09568786621094, 77.04132080078125, 46.996002197265625, 49.93072509765625, 83.83499145507812, -1.0943870544433594, -14.9510498046875, 34.78057861328125, 11.971183776855469, 240.5218505859375, 150.60690307617188, 68.41952514648438, 36.1064453125, -76.796630859375, -18.826751708984375, 92.155517578125, 173.01608276367188, 145.9580078125, 19.50494384765625, 73.64631652832031, 20.58599853515625, 64.7679443359375, 28.31121063232422, 49.474639892578125, 29.494659423828125, 54.24261474609375, 38.1419677734375, 128.84259033203125, -25.222816467285156, 15.4188232421875, 38.41326904296875, 24.266265869140625, 125.31377410888672, 67.1805419921875, 4.625457763671875, -11.58453369140625, 17.757831573486328, 202.2406005859375, 47.903831481933594, 4.143524169921875, 68.73497009277344, 34.73101806640625, 136.33407592773438, 6.7115478515625, 115.3468017578125, -0.2644500732421875, 83.2694091796875, -99.1739501953125, -102.4010009765625, 153.1461639404297, -86.59130859375, 65.02629089355469, 118.54332733154297, 11.393819808959961, -18.034210205078125, 105.23846435546875, 52.638427734375, 85.23764038085938, 104.60513305664062, -26.58293914794922, 119.87823486328125, 122.30413818359375, 61.54888916015625, -28.516448974609375, -54.95350646972656, 284.4410400390625, 88.74140930175781, 12.168617248535156, 43.675079345703125, 141.65597534179688, -21.085205078125, 60.979034423828125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000192.npy"}
{"epoch": 0.40209424083769635, "step": 193, "batch_size": 128, "mean": 51.30720138549805, "std": 90.57910919189453, "min": -202.90432739257812, "p10": -56.125360107421876, "median": 41.94270324707031, "p90": 167.01961364746091, "max": 357.74761962890625, "pos_frac": 0.765625, "sample": [68.27027893066406, 32.43980407714844, 3.155242919921875, -20.587127685546875, 37.34051513671875, 64.3944091796875, 92.10823822021484, 41.192352294921875, -53.4490966796875, 27.11579132080078, 7.306510925292969, 7.585348129272461, 198.05709838867188, 162.24761962890625, 85.30412292480469, -19.3680419921875, 156.04861450195312, 79.12094116210938, 119.77824401855469, 57.518829345703125, 117.45201110839844, 29.335147857666016, -35.26966857910156, 197.65716552734375, 189.0941162109375, 163.57586669921875, 248.78631591796875, -152.1959991455078, -129.2509765625, 105.05496215820312, 14.748046875, 11.586034774780273, 24.313919067382812, 94.85397338867188, 36.42875671386719, 186.67852783203125, 83.7232666015625, -126.04165649414062, -68.03368377685547, 40.93757629394531, 80.32943725585938, -54.790618896484375, 54.841888427734375, -8.03955078125, 108.56941223144531, 5.235267639160156, 96.57342529296875, 25.719284057617188, 40.14471435546875, -18.74212646484375, -202.90432739257812, -82.03955078125, 57.142578125, 116.04969024658203, 2.7284317016601562, -65.63978576660156, 173.25076293945312, 164.34912109375, 38.680423736572266, 29.66962432861328, 249.38546752929688, -157.36181640625, 52.88165283203125, -56.196197509765625, 75.4579086303711, 48.85369873046875, 157.82659912109375, 9.604036331176758, -133.17041015625, 24.499908447265625, -10.811225891113281, 124.33407592773438, 63.09803009033203, 47.104217529296875, -74.74630737304688, -67.6573486328125, 104.02788543701172, 83.6514892578125, -3.86944580078125, 64.75537109375, 90.01907348632812, 237.37939453125, -36.55823516845703, 91.4144287109375, -6.29840087890625, -10.983245849609375, 270.535888671875, -56.095001220703125, 22.901138305664062, 87.80438232421875, 16.90802001953125, 6.727813720703125, 3.68499755859375, 98.282958984375, 0.79364013671875, 93.24382019042969, 72.86050415039062, 54.98054504394531, 38.01310729980469, 129.31036376953125, 29.94842529296875, 67.01361083984375, 101.81353759765625, 47.265289306640625, 145.57147216796875, -1.779266357421875, -1.890045166015625, 7.623199462890625, 8.7410888671875, 18.34363555908203, 119.16837310791016, 45.595306396484375, 225.20465087890625, 213.77818298339844, 31.07794189453125, 357.74761962890625, 3.845489501953125, -24.9833984375, -30.696861267089844, -56.436981201171875, 113.99139404296875, 184.2588348388672, 83.59766387939453, 48.110931396484375, 123.30947875976562, 42.69305419921875, 34.2109375, 83.46417236328125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000193.npy"}
{"epoch": 0.4041884816753927, "step": 194, "batch_size": 128, "mean": 41.192596435546875, "std": 85.5194320678711, "min": -216.85023498535156, "p10": -49.873645019531246, "median": 29.483867645263672, "p90": 150.68397216796873, "max": 413.559326171875, "pos_frac": 0.6953125, "sample": [87.85598754882812, 8.819747924804688, 81.19871520996094, -56.06505584716797, 14.701469421386719, 22.072967529296875, 126.62605285644531, 8.3094482421875, -62.34556579589844, 47.70171356201172, 81.05902099609375, 0.7775421142578125, 203.1637420654297, -48.240234375, 21.20935821533203, 66.400634765625, -89.9371337890625, -45.658355712890625, 97.80938720703125, -173.50360107421875, 0.5054512023925781, -216.85023498535156, 198.7093505859375, 51.395965576171875, -46.354888916015625, 90.789306640625, 127.95782470703125, 124.08079528808594, 177.5442352294922, -18.577377319335938, 132.790771484375, 158.29696655273438, 95.94427490234375, 213.07785034179688, 2.8760986328125, -73.47050476074219, 221.72552490234375, 106.18242645263672, 75.38592529296875, 168.5786895751953, 60.19921875, 0.5868759155273438, 52.187530517578125, -7.100250244140625, 36.606712341308594, 34.911895751953125, 413.559326171875, 18.952072143554688, 110.0665283203125, 82.510009765625, 180.8916015625, 29.67193603515625, -33.575714111328125, 93.7413330078125, 109.71371459960938, 28.090003967285156, 14.900226593017578, 20.69000244140625, 37.43524169921875, 15.992256164550781, -20.853042602539062, 16.060115814208984, -44.636810302734375, 54.47528076171875, 55.081695556640625, -14.767536163330078, 91.1366958618164, -86.98226928710938, -62.17304992675781, 147.7293701171875, -74.65121459960938, 61.752960205078125, 21.433013916015625, 30.97911834716797, -60.552093505859375, 70.52838134765625, 51.06111145019531, 77.32086181640625, 43.799522399902344, 59.15234375, -25.670684814453125, 152.00967407226562, -53.6849365234375, 123.20294189453125, 25.28387451171875, -2.888477325439453, -28.669708251953125, 84.32598876953125, 208.67300415039062, 87.248291015625, 62.68170166015625, -147.78384399414062, -33.233642578125, 2.057098388671875, 98.64755249023438, 16.693069458007812, -10.662069320678711, 65.55709838867188, 55.52935791015625, 12.733331680297852, -43.122283935546875, 135.10214233398438, 29.295799255371094, 166.13290405273438, -15.433059692382812, -72.24545288085938, -15.400543212890625, 25.42584228515625, 72.38433837890625, -0.27227783203125, -1.7463607788085938, 75.65653991699219, -5.835458755493164, -41.401268005371094, 69.06134033203125, 209.92710876464844, -28.433074951171875, 10.94439697265625, 2.26861572265625, 150.11581420898438, 43.870269775390625, -7.631248474121094, -6.694095611572266, -13.0966796875, 8.708904266357422, 31.43927001953125, -22.851654052734375, 125.9637451171875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000194.npy"}
{"epoch": 0.406282722513089, "step": 195, "batch_size": 128, "mean": 40.836669921875, "std": 84.97327423095703, "min": -158.58384704589844, "p10": -51.015066528320304, "median": 26.695388793945312, "p90": 150.4583557128906, "max": 380.208740234375, "pos_frac": 0.6640625, "sample": [-25.75994873046875, -2.4123001098632812, -1.4498748779296875, 79.105224609375, -14.01983642578125, -40.04338073730469, 6.904510498046875, -2.0549087524414062, 12.98193359375, -9.37451171875, 79.97232055664062, 51.13226318359375, -91.05047607421875, 40.614013671875, 40.8787841796875, 43.567047119140625, -42.26952362060547, -107.9056396484375, -66.73912048339844, -10.55413818359375, 244.24346923828125, 18.125656127929688, 99.28392028808594, 173.16400146484375, 86.61141967773438, 71.79037475585938, 162.51593017578125, 93.53466796875, -82.634033203125, 13.063201904296875, -32.14049530029297, 43.5572509765625, 198.5584716796875, 61.10894775390625, -54.634552001953125, -2.6786041259765625, 221.98748779296875, 102.2530517578125, 21.10906982421875, -158.58384704589844, -36.269561767578125, 82.4537353515625, 64.80596923828125, -21.940032958984375, 105.26382446289062, 129.37582397460938, -64.30471801757812, 14.63653564453125, -57.471824645996094, 50.1619873046875, 50.50244140625, 55.514892578125, 25.69493865966797, 96.37934875488281, -23.971229553222656, 113.17323303222656, -59.59562683105469, -30.31151580810547, 23.32929229736328, -54.50543212890625, 158.22027587890625, -111.73052978515625, 62.01860046386719, 63.77379608154297, 32.0633544921875, 26.93902587890625, 75.14752197265625, 28.198394775390625, 46.9736328125, -31.248207092285156, -0.46137237548828125, 15.298553466796875, -30.92510986328125, -26.805572509765625, 18.468833923339844, 0.758544921875, 22.719818115234375, 2.6256942749023438, 91.48197174072266, 5.436309814453125, 120.41641235351562, 20.297210693359375, 88.140380859375, 116.997802734375, 14.356536865234375, 25.595657348632812, -25.0906982421875, 148.134033203125, 82.99002838134766, 35.14051818847656, 109.03021240234375, 240.32913208007812, 177.68023681640625, -8.405723571777344, 26.451751708984375, 6.7200164794921875, 7.522228240966797, -11.499410629272461, -46.60382080078125, 204.00259399414062, 231.15585327148438, 89.83097839355469, 110.11456298828125, -130.16201782226562, -49.519195556640625, 8.605712890625, -44.9757080078125, 140.65982055664062, 105.6258544921875, -136.0286865234375, -14.268798828125, 116.78714752197266, -18.80853271484375, 104.51617431640625, 68.488037109375, 47.76512145996094, 155.88177490234375, 127.58720397949219, 380.208740234375, -47.99609375, 80.63201904296875, 42.177947998046875, 105.49109649658203, -7.922088623046875, 91.4801254272461, 31.228439331054688, 190.2647705078125, -17.565139770507812], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000195.npy"}
{"epoch": 0.4083769633507853, "step": 196, "batch_size": 128, "mean": 52.306053161621094, "std": 88.6597671508789, "min": -276.5655822753906, "p10": -26.45525131225586, "median": 37.028228759765625, "p90": 171.3014953613281, "max": 302.1692810058594, "pos_frac": 0.7109375, "sample": [-9.736701965332031, 32.94854736328125, 88.40460205078125, 36.905303955078125, 16.899993896484375, 45.4384765625, 63.7554931640625, 7.50164794921875, 223.31158447265625, 14.347991943359375, 179.0724639892578, 64.13125610351562, -69.20849609375, 158.20318603515625, -105.15682983398438, -8.049869537353516, -9.46441650390625, 161.46649169921875, -10.654748916625977, 165.60501098632812, -17.041786193847656, 4.39752197265625, 202.367431640625, 135.25711059570312, 62.729896545410156, -276.5655822753906, 2.536407470703125, 10.523872375488281, 164.8578338623047, 143.80068969726562, -66.96942138671875, -28.85822296142578, 52.50895690917969, 52.4193115234375, -16.31878662109375, 31.928007125854492, -11.948246002197266, -100.5411376953125, 82.95426940917969, 104.22108459472656, 27.203292846679688, 7.848609924316406, 107.89761352539062, 145.75376892089844, -17.73687744140625, 16.535934448242188, 22.58263397216797, 173.80874633789062, -17.586822509765625, -58.57032775878906, 1.6461029052734375, 226.64886474609375, 69.99665832519531, 80.72808837890625, 20.16229248046875, 240.36386108398438, -26.38690185546875, 161.35919189453125, 96.9259033203125, 216.2942657470703, -22.74352264404297, 79.5755615234375, 17.48009490966797, 136.77459716796875, 112.62875366210938, -19.990951538085938, -1.2010345458984375, 208.2084197998047, 13.741485595703125, 175.63912963867188, 106.05313110351562, -52.64521789550781, 10.645004272460938, 36.582000732421875, -16.507286071777344, -5.528226852416992, 48.423492431640625, 4.723899841308594, 63.357421875, 73.85751342773438, -3.892162322998047, 35.358802795410156, 63.589263916015625, 141.30096435546875, 264.28704833984375, -7.111225128173828, -32.699771881103516, 87.14517974853516, 92.8900146484375, 38.441070556640625, 214.94522094726562, -158.15252685546875, 58.44183349609375, 2.549875259399414, -140.34893798828125, 117.45828247070312, 50.125244140625, 58.622802734375, 170.22695922851562, -10.90814208984375, 97.1417236328125, 6.420169830322266, 37.151153564453125, 155.61376953125, -31.69512176513672, 69.1058349609375, 146.39199829101562, 27.583938598632812, 67.74737548828125, -26.441497802734375, 30.745628356933594, -1.5284423828125, -22.5908203125, -5.663116455078125, 68.63230895996094, 38.51116943359375, 95.911865234375, 15.386505126953125, 76.20143127441406, 302.1692810058594, 159.62228393554688, 209.78256225585938, -4.8497314453125, -26.487342834472656, 45.20642852783203, -4.411865234375, 69.3385009765625, 15.379339218139648], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000196.npy"}
{"epoch": 0.41047120418848165, "step": 197, "batch_size": 128, "mean": 59.77152633666992, "std": 87.37684631347656, "min": -169.24029541015625, "p10": -54.08446502685547, "median": 49.28257751464844, "p90": 178.4270065307617, "max": 335.32415771484375, "pos_frac": 0.796875, "sample": [-72.64750671386719, 31.694747924804688, -73.87600708007812, 148.51229858398438, -53.97471618652344, 120.97649383544922, 104.959716796875, 145.19134521484375, 174.91229248046875, 158.99215698242188, -64.14869689941406, 198.916259765625, 0.37518310546875, 53.94877624511719, 53.105743408203125, 14.402595520019531, -14.87744140625, -82.92240142822266, 109.51702880859375, 85.26150512695312, -29.582763671875, -45.634979248046875, -0.43199920654296875, -142.2861328125, -148.1090087890625, 7.329132080078125, 119.14248657226562, 125.48336791992188, 27.66436767578125, 20.418777465820312, 97.74435424804688, 25.784423828125, 184.64251708984375, -23.129501342773438, 135.31723022460938, 45.96144104003906, 28.38296127319336, -21.671546936035156, 28.113685607910156, 206.62554931640625, 6.468774795532227, 60.74650573730469, 35.078758239746094, 66.43798828125, 82.7576904296875, -30.81915283203125, 58.16691589355469, 14.574264526367188, 16.108203887939453, 35.06793212890625, 104.98310852050781, 74.54510498046875, 28.796417236328125, 72.754150390625, -21.642425537109375, 112.92929077148438, 8.8126220703125, 133.66775512695312, 26.289215087890625, 22.222396850585938, 141.40936279296875, -4.219966888427734, 182.07591247558594, 32.650665283203125, 0.303253173828125, 162.0745849609375, 176.86318969726562, 165.78192138671875, 56.71376037597656, 335.32415771484375, -38.68011474609375, 193.12734985351562, 65.87957763671875, -66.63204956054688, 223.3984375, 200.5882568359375, 170.70559692382812, 29.31707763671875, 48.0511474609375, 176.33901977539062, -6.803184509277344, 183.01231384277344, 9.59130859375, 79.15460205078125, 210.64654541015625, 17.098495483398438, 89.30795288085938, 94.99732971191406, 120.9520263671875, 14.256698608398438, 50.514007568359375, 13.878753662109375, 0.0, 212.44363403320312, 130.49591064453125, 75.37774658203125, 3.5442047119140625, 54.06829833984375, 30.50273895263672, 2.0010509490966797, -169.24029541015625, -66.48199462890625, 100.73080444335938, 16.670181274414062, 131.3475341796875, 140.7218017578125, 135.3638153076172, -56.342742919921875, 66.66555786132812, 211.50531005859375, 13.269729614257812, 31.903076171875, -54.340545654296875, 9.879425048828125, -74.47185516357422, 7.1571044921875, 163.1300048828125, 35.82830810546875, 185.09506225585938, 70.14633178710938, 109.78369140625, 46.719482421875, 141.0093994140625, 45.662322998046875, 141.13677978515625, -111.23974609375, 82.11660766601562, 72.88944244384766], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000197.npy"}
{"epoch": 0.41256544502617803, "step": 198, "batch_size": 128, "mean": 60.743309020996094, "std": 87.41522979736328, "min": -128.15988159179688, "p10": -33.78808441162109, "median": 45.546165466308594, "p90": 176.36790618896484, "max": 272.9183349609375, "pos_frac": 0.7265625, "sample": [158.038818359375, 24.28936767578125, 1.578125, -72.53300476074219, 168.1795654296875, 165.47586059570312, 19.329345703125, 55.29343032836914, 134.28280639648438, 154.566162109375, 40.86993408203125, 115.20361328125, 4.491811752319336, 14.521766662597656, 64.78755187988281, -52.62705993652344, 119.39877319335938, 71.14096069335938, 5.168664932250977, 164.81564331054688, -6.936357498168945, 70.8165283203125, 193.0772705078125, 88.77301025390625, 44.5230712890625, 266.4522705078125, 111.7442626953125, 75.23214721679688, -20.08561134338379, 6.22784423828125, -14.129791259765625, -36.619659423828125, 136.23641967773438, 52.527679443359375, 179.5465087890625, 235.14306640625, -23.927642822265625, 134.18817138671875, -115.52252197265625, 227.9093017578125, 170.15530395507812, -119.25213623046875, -61.64007568359375, 147.50531005859375, 31.061561584472656, 13.4222412109375, 65.85939025878906, 102.35212707519531, 31.679534912109375, 80.18289184570312, 93.3713607788086, -118.66940307617188, 220.4886474609375, -10.503459930419922, 78.64886474609375, 138.3687744140625, 71.85868072509766, 23.716480255126953, 139.14614868164062, 222.60336303710938, 175.0621337890625, 39.933746337890625, 184.70529174804688, 21.10552978515625, 156.85247802734375, 32.67828369140625, -14.708206176757812, -28.485637664794922, 58.024932861328125, -6.60833740234375, 251.3897705078125, 176.99635314941406, 227.73077392578125, 161.27264404296875, 5.40032958984375, 5.133148193359375, 272.9183349609375, -23.191558837890625, 39.14947509765625, 85.4185791015625, -24.12176513671875, 108.46466064453125, -128.15988159179688, -18.063385009765625, 151.77391052246094, 108.70938110351562, 18.197662353515625, -1.8599128723144531, -71.62045288085938, 63.79840087890625, 0.0, 92.4183349609375, 140.24966430664062, 3.81976318359375, 63.58026123046875, -34.80207824707031, 23.630889892578125, 176.09857177734375, 30.743724822998047, -20.453628540039062, -27.856624603271484, -22.44136619567871, 195.80499267578125, -24.438674926757812, 37.6795654296875, -33.353515625, 88.1475830078125, 55.7855224609375, 88.94417572021484, 92.34335327148438, -7.249086380004883, 103.4456787109375, 46.56925964355469, 122.77938842773438, -40.375205993652344, 38.03057861328125, -4.49627685546875, -94.5753173828125, 143.48159790039062, 9.608642578125, 28.286026000976562, -40.50885009765625, -24.884231567382812, 16.910552978515625, 36.416961669921875, 113.32394409179688, 93.86981201171875, -1.0904541015625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000198.npy"}
{"epoch": 0.41465968586387436, "step": 199, "batch_size": 128, "mean": 52.04099655151367, "std": 84.8222427368164, "min": -168.34451293945312, "p10": -54.472924804687494, "median": 44.69851493835449, "p90": 165.230776977539, "max": 252.99237060546875, "pos_frac": 0.75, "sample": [33.95381164550781, 84.73577880859375, 145.98922729492188, 27.72186279296875, 137.25106811523438, 13.6103515625, 19.574142456054688, 197.7332000732422, 63.499176025390625, 53.01091003417969, 43.786373138427734, 4.826871871948242, -2.96160888671875, -91.59613037109375, 43.494384765625, 220.73046875, -20.604232788085938, 24.070762634277344, 46.611427307128906, -67.18679809570312, -8.705692291259766, 30.88934326171875, 22.561508178710938, 123.51113891601562, 70.50222778320312, 19.08709716796875, 202.65960693359375, -21.080856323242188, 109.628173828125, -61.28330993652344, 115.2108383178711, 45.62689208984375, 152.41668701171875, 143.66685485839844, 90.67874145507812, -3.533182144165039, -13.281181335449219, -32.12607192993164, 79.24317169189453, 24.12017822265625, 61.83747100830078, -34.89126968383789, 10.322687149047852, 84.47481536865234, 60.398712158203125, 69.0184326171875, -79.18722534179688, 53.95591735839844, -11.837371826171875, 68.80426025390625, 215.88711547851562, -105.13633728027344, 146.42153930664062, -47.57521057128906, 125.97222900390625, 5.85400390625, 147.8509521484375, 112.471923828125, 38.8388671875, 25.3974609375, 196.69041442871094, 75.73805236816406, -53.55867004394531, 125.06121826171875, 116.58540344238281, 96.81280517578125, 248.05978393554688, 192.3617401123047, 34.281707763671875, 65.99710083007812, 26.46722412109375, 12.83441162109375, -56.60618591308594, 87.0755386352539, -19.346595764160156, 24.985626220703125, 26.95587158203125, 33.34154510498047, 142.7568359375, 74.70506286621094, 89.38855743408203, 229.9080810546875, 35.83135986328125, -35.706268310546875, 93.491455078125, -141.44696044921875, -28.31964874267578, -0.6919078826904297, 223.10638427734375, 45.61065673828125, -114.39926147460938, -49.968994140625, 32.692901611328125, -65.33038330078125, 25.822372436523438, -47.490562438964844, -18.7999267578125, 90.99618530273438, 193.6453857421875, -83.23974609375, 5.623565673828125, 157.15960693359375, 62.5462646484375, 27.31039810180664, 99.09259033203125, -112.62042236328125, 110.312255859375, 121.35712432861328, -168.34451293945312, 96.27407836914062, 108.62109375, 74.07975769042969, 22.099227905273438, 86.86624145507812, 5.465389251708984, 24.367416381835938, -3.2890167236328125, 85.37374877929688, 66.05732727050781, 49.076507568359375, 100.1395492553711, 184.06350708007812, 36.11372375488281, 30.807281494140625, 252.99237060546875, 214.6767578125, 76.33415222167969, -94.52938842773438], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000199.npy"}
{"epoch": 0.4167539267015707, "step": 200, "batch_size": 128, "mean": 43.804443359375, "std": 96.21707153320312, "min": -224.99166870117188, "p10": -70.67684326171874, "median": 39.23006820678711, "p90": 178.42856445312498, "max": 285.5225830078125, "pos_frac": 0.671875, "sample": [64.453857421875, -39.52403259277344, 207.45584106445312, 67.04632568359375, 66.29141235351562, 135.53216552734375, 46.24632263183594, -15.236572265625, 95.71925354003906, -37.54877471923828, 62.770965576171875, 193.3143310546875, 234.5625, -50.29352569580078, -0.025054931640625, 51.25555419921875, 171.31536865234375, 109.9797134399414, -59.3756103515625, 194.538330078125, 48.85352325439453, 192.8211669921875, -88.87249755859375, -17.77703857421875, -116.572265625, 210.3994140625, 285.5225830078125, 19.040740966796875, 54.8992919921875, 241.30630493164062, 56.83659362792969, 110.43777465820312, -78.34881591796875, 48.53308868408203, -9.68130874633789, -5.733673095703125, -65.88523864746094, 216.887451171875, 148.6041259765625, -33.50537109375, -109.178466796875, -34.177398681640625, 151.16546630859375, 82.25785827636719, 77.7496337890625, 15.948165893554688, 21.982803344726562, -23.605514526367188, -146.5419921875, 55.8343505859375, 214.3321533203125, 119.03802490234375, 10.462554931640625, 94.38723754882812, 82.30401611328125, 12.0223388671875, 26.09777069091797, 136.71978759765625, 0.473846435546875, 164.53781127929688, -10.971229553222656, 7.037139892578125, 80.55319213867188, 35.96985626220703, -28.78411865234375, 26.49860954284668, -67.38885498046875, 189.64767456054688, 0.0, 84.30551147460938, 183.99395751953125, 17.576934814453125, 152.94873046875, 109.36309814453125, -55.291542053222656, -13.758804321289062, 153.45486450195312, -182.7763671875, 89.90869140625, -10.482734680175781, 9.09222412109375, 8.220611572265625, 110.92739868164062, 64.01641845703125, -99.05429077148438, 92.4254150390625, 12.02471923828125, -41.06280517578125, -129.50973510742188, 156.95257568359375, -40.4810791015625, 195.26348876953125, 56.03318786621094, 18.608619689941406, 147.46890258789062, -12.253158569335938, 1.7725830078125, -33.00360107421875, -95.69467163085938, 155.37905883789062, -62.251220703125, -84.9246826171875, 115.064208984375, 50.72454833984375, 48.715240478515625, 9.0299072265625, -118.94015502929688, -224.99166870117188, 85.64686584472656, 85.28070068359375, -6.650356292724609, 97.369140625, -39.902099609375, -21.43951416015625, 3.9893798828125, 176.04339599609375, -122.5196533203125, 11.2421875, 17.758201599121094, 152.05227661132812, -50.31549072265625, 143.82049560546875, 65.38262939453125, 104.00509643554688, 42.49028015136719, 15.733673095703125, 98.95095825195312, 8.619293212890625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000200.npy"}
{"epoch": 0.418848167539267, "step": 201, "batch_size": 128, "mean": 47.688560485839844, "std": 91.09215545654297, "min": -236.19100952148438, "p10": -66.97705764770508, "median": 38.24949264526367, "p90": 166.26368103027343, "max": 247.6265869140625, "pos_frac": 0.6796875, "sample": [-95.04690551757812, -84.84469604492188, 115.16339111328125, 120.280517578125, -28.07904815673828, -90.60235595703125, 0.0, -138.236572265625, 166.32778930664062, 209.51708984375, 72.85771179199219, 5.6297454833984375, -2.8740081787109375, 14.160825729370117, -13.823650360107422, 33.73749542236328, 163.8885498046875, 135.87606811523438, 34.21331787109375, 0.0, 11.488433837890625, 139.74111938476562, 66.42688751220703, 209.9107666015625, 96.77139282226562, -17.032760620117188, 4.29810905456543, 1.9669189453125, -31.27960205078125, -39.5401611328125, 103.69429016113281, 157.17645263671875, 73.64920043945312, 59.23187255859375, -37.4744873046875, -5.83599853515625, -17.563751220703125, -12.832626342773438, -22.073951721191406, -25.721969604492188, 12.894500732421875, 138.28024291992188, 73.96292877197266, 0.0, 166.2362060546875, 207.344970703125, -10.165172576904297, 68.5558090209961, 29.448410034179688, -126.48300170898438, 98.97998046875, -21.18890380859375, 213.17852783203125, 108.01052856445312, -108.59396362304688, 114.0970458984375, -34.50250244140625, 141.24107360839844, -72.30396270751953, 216.07595825195312, 3.275390625, 21.96331787109375, 5.106559753417969, 140.6395263671875, 51.52783203125, 144.6666259765625, 69.37936401367188, 62.37665557861328, -19.807449340820312, 63.31646728515625, 135.91641235351562, 82.72490692138672, 21.571014404296875, -30.004547119140625, 75.93539428710938, 234.09359741210938, 160.95458984375, 185.97366333007812, 95.295654296875, 88.71659851074219, -20.951202392578125, 191.20233154296875, 107.95352172851562, 65.20692443847656, 37.884605407714844, -26.010345458984375, -98.21017456054688, 114.174072265625, -104.79879760742188, 223.41455078125, 247.6265869140625, -4.2017822265625, 38.6143798828125, -138.560302734375, 51.58161926269531, 124.8902587890625, -23.536941528320312, 115.72225952148438, -67.96951293945312, -81.593505859375, -32.088836669921875, 122.79025268554688, 58.39556884765625, 28.079193115234375, 19.369354248046875, 72.25784301757812, 244.974365234375, 110.52188110351562, 1.46612548828125, 181.39736938476562, 88.68331909179688, 20.556304931640625, 13.289253234863281, 9.796821594238281, -17.78887939453125, 68.704345703125, 133.45986938476562, 50.603515625, 65.54216003417969, 10.56982421875, 133.15444946289062, 19.720550537109375, 51.28387451171875, 3.67547607421875, -66.55171966552734, 92.24955749511719, -38.05921173095703, -236.19100952148438], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000201.npy"}
{"epoch": 0.42094240837696334, "step": 202, "batch_size": 128, "mean": 48.06559753417969, "std": 93.0828857421875, "min": -174.96417236328125, "p10": -63.51143341064453, "median": 44.76729393005371, "p90": 162.19539794921872, "max": 290.8097229003906, "pos_frac": 0.6796875, "sample": [283.5213623046875, -168.2913360595703, -62.90882873535156, 51.340576171875, -18.83453369140625, 191.42807006835938, 22.822052001953125, 159.900390625, -82.189453125, 153.58607482910156, 49.54747009277344, 107.81562805175781, 145.3035888671875, -85.78663635253906, -27.5562744140625, 99.06230926513672, -45.42303466796875, -20.90887451171875, 131.44009399414062, 5.47802734375, 87.35539245605469, -96.62826538085938, -23.5111083984375, -23.0941162109375, 0.0, 155.109375, 42.380252838134766, 86.33779907226562, -11.814788818359375, 25.376617431640625, 104.54510498046875, -67.84872436523438, 185.78836059570312, 79.69344329833984, -8.990203857421875, 219.3959503173828, 124.68701171875, 155.73809814453125, 61.70421600341797, -50.82072448730469, 61.431983947753906, 132.7593994140625, -41.261688232421875, 59.624267578125, 146.26473999023438, -15.48371696472168, -123.18988037109375, 28.408493041992188, -65.74917602539062, 40.631683349609375, 30.36902618408203, 120.96771240234375, -174.96417236328125, 126.94728088378906, 63.25984191894531, 241.63351440429688, -53.024749755859375, -83.90540313720703, 12.61117172241211, -11.017257690429688, 52.6990966796875, 113.00192260742188, -170.51028442382812, 4.0877685546875, -64.91751098632812, -15.9852294921875, 25.8599853515625, 106.39462280273438, 89.5968017578125, 261.9778137207031, 119.24358367919922, 158.03836059570312, -13.609329223632812, 19.88824462890625, 137.53961181640625, 7.150165557861328, 168.82974243164062, -58.462127685546875, -35.048980712890625, 3.9853515625, 77.4422607421875, 17.966854095458984, 126.05793762207031, 66.64215087890625, 65.32269287109375, -43.89923095703125, 94.73966979980469, 63.2886962890625, -37.30632019042969, 22.307403564453125, 20.63580322265625, 271.4513244628906, -6.74885368347168, 228.18064880371094, 64.67732238769531, 15.539390563964844, 55.422691345214844, 186.1661376953125, 137.37484741210938, 15.776447296142578, -12.48748779296875, 89.61886596679688, -3.78387451171875, 59.32960510253906, 13.364883422851562, 34.220184326171875, -123.59891510009766, 61.7132568359375, 83.83308410644531, 64.67306518554688, 167.5504150390625, 16.425369262695312, 145.17666625976562, 49.051544189453125, 290.8097229003906, -29.126220703125, -4.419441223144531, -34.01350402832031, 80.19781494140625, -6.852020263671875, 6.845428466796875, 49.606407165527344, 72.26693725585938, 41.149818420410156, 47.154335021972656, 91.10842895507812, -71.38704681396484, 192.10812377929688], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000202.npy"}
{"epoch": 0.42303664921465967, "step": 203, "batch_size": 128, "mean": 36.23287582397461, "std": 80.16722869873047, "min": -161.69210815429688, "p10": -77.6338394165039, "median": 36.92022705078125, "p90": 127.4942687988281, "max": 286.556884765625, "pos_frac": 0.6875, "sample": [28.25599479675293, 4.446556091308594, 106.7889404296875, 60.2740478515625, 79.03741455078125, 89.42523193359375, 10.712814331054688, -24.72616958618164, -82.45953369140625, -44.345458984375, -28.936363220214844, 69.81930541992188, -40.04920959472656, 75.81734466552734, 5.113304138183594, 44.81581115722656, -36.925148010253906, 119.08316802978516, -92.70095825195312, 123.6121826171875, 145.2491455078125, 21.37310028076172, 139.78111267089844, 104.14002990722656, 67.9781494140625, 7.5165863037109375, -133.72918701171875, 176.82994079589844, -54.55088806152344, 23.307525634765625, 16.416109085083008, -2.492584228515625, 83.42730712890625, 123.6324462890625, 108.46821594238281, 73.15617370605469, 36.1685791015625, 286.556884765625, 42.629852294921875, -4.8199462890625, 18.1834716796875, 0.0, -87.85348510742188, 87.03848266601562, 65.05096435546875, 34.3109130859375, 210.72129821777344, -68.4090576171875, 26.373069763183594, 48.670623779296875, 51.038177490234375, -57.457550048828125, 73.93658447265625, -73.0478515625, -79.056396484375, 28.0093994140625, 122.51637268066406, -77.31257629394531, 77.60699462890625, 30.96148681640625, 41.354400634765625, 92.31454467773438, 16.84929656982422, 49.24462890625, -20.016632080078125, 56.487213134765625, 88.34848022460938, 36.6248779296875, 77.42726135253906, 157.39889526367188, 116.1563720703125, 44.720848083496094, -101.31680297851562, 30.29743194580078, 62.804656982421875, 72.05938720703125, 62.40313720703125, 4.012144088745117, 162.19509887695312, 195.68185424804688, -104.91329956054688, -21.78125, -36.362274169921875, -4.815065383911133, 75.68280792236328, -90.96783447265625, -24.25079345703125, -61.88892364501953, 212.31640625, 88.14239501953125, 32.23419952392578, 94.009765625, 103.48431396484375, -135.014892578125, 97.61679077148438, 116.35311889648438, -2.391366958618164, 90.9947509765625, -78.38345336914062, 119.004150390625, 7.188777923583984, -49.609832763671875, 46.684226989746094, 151.16275024414062, 37.215576171875, 3.9414939880371094, 125.27023315429688, 138.70257568359375, -19.437530517578125, 105.19890594482422, 77.12542724609375, 43.627071380615234, -137.29830932617188, 21.131195068359375, -97.98477172851562, 12.778663635253906, -12.490997314453125, 171.91195678710938, 0.0, -2.74945068359375, -37.814422607421875, 132.68368530273438, 34.80130386352539, 47.19677734375, 68.01382446289062, -161.69210815429688, -21.720287322998047, 80.47601318359375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000203.npy"}
{"epoch": 0.42513089005235605, "step": 204, "batch_size": 128, "mean": 56.703773498535156, "std": 82.10517120361328, "min": -121.78717041015625, "p10": -56.97146911621093, "median": 50.723388671875, "p90": 162.12680358886718, "max": 273.5764465332031, "pos_frac": 0.765625, "sample": [155.47482299804688, 77.51611328125, 42.823883056640625, 75.40338134765625, 11.187545776367188, 37.62480163574219, 96.1593017578125, -110.36724853515625, 34.07505798339844, -47.2767333984375, 51.24571990966797, 32.082305908203125, 143.62954711914062, 196.62564086914062, 106.8051528930664, 7.54400634765625, 82.48527526855469, 181.57757568359375, 147.20928955078125, 120.15838623046875, 50.74296569824219, -39.82568359375, 48.983917236328125, 134.36614990234375, 21.72540283203125, -63.134063720703125, -96.44107818603516, 153.09689331054688, 62.06365966796875, 144.6202392578125, 39.74664306640625, 67.51422119140625, 41.691131591796875, 151.84622192382812, 6.094814300537109, 122.34198760986328, 12.634891510009766, 273.5764465332031, 50.70381164550781, 113.217041015625, 102.970458984375, 26.277694702148438, 44.4552001953125, 58.32219696044922, -85.7781982421875, 143.09909057617188, 68.06228637695312, -12.531044006347656, -99.9981689453125, 27.533599853515625, -17.72475814819336, 115.07693481445312, 39.02449035644531, 10.374160766601562, 129.09219360351562, -1.84381103515625, 10.884552001953125, -88.4248046875, 117.8443603515625, 34.04350280761719, 58.347869873046875, -11.258049011230469, 82.55303955078125, 61.697235107421875, 71.355224609375, 178.37930297851562, 45.351318359375, 59.30340576171875, 177.02606201171875, -55.088165283203125, 174.79745483398438, 22.572731018066406, 49.344482421875, 115.6937255859375, 7.7832183837890625, -61.3658447265625, 162.17095947265625, -5.268363952636719, -20.758811950683594, -76.84945678710938, -0.9692840576171875, -46.346656799316406, -6.361234664916992, 2.287384033203125, 117.16106414794922, 132.45477294921875, 62.612762451171875, 86.42943572998047, 165.2105712890625, 159.0011749267578, 95.15814208984375, 5.1245574951171875, 16.6060791015625, 20.557968139648438, 268.72894287109375, 96.94573974609375, 74.782470703125, -51.07525634765625, -90.66571044921875, -30.87591552734375, 109.85174560546875, 147.9906005859375, -97.38114929199219, 167.94818115234375, 28.317047119140625, 94.59622192382812, 46.54833984375, 222.5111083984375, 162.10787963867188, 98.706787109375, 102.04196166992188, -21.41998291015625, 157.8470458984375, 150.50332641601562, -2.578580856323242, 21.22711181640625, 167.43243408203125, 79.58826446533203, 18.727218627929688, 57.62150573730469, -121.78717041015625, 128.799072265625, 0.9977626800537109, 36.6180419921875, -63.98663330078125, 181.13784790039062, -76.7083740234375, -10.03802490234375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000204.npy"}
{"epoch": 0.4272251308900524, "step": 205, "batch_size": 128, "mean": 63.869895935058594, "std": 91.5755844116211, "min": -137.72598266601562, "p10": -36.12364807128906, "median": 48.32063293457031, "p90": 178.73776245117188, "max": 411.7734375, "pos_frac": 0.7734375, "sample": [165.58004760742188, 0.0, 31.720565795898438, 12.54627799987793, 13.821624755859375, -14.181884765625, -51.4468994140625, 20.985301971435547, 152.42474365234375, 127.77261352539062, 101.19116973876953, 103.75830078125, -30.24048614501953, 10.94012451171875, 0.2558250427246094, 25.099258422851562, 47.455413818359375, 12.1962890625, 225.6009521484375, 8.318389892578125, 191.92898559570312, 91.27198791503906, 84.36371612548828, -99.55538940429688, 17.475601196289062, 299.3843994140625, 86.84884643554688, 141.81399536132812, -96.19630432128906, 112.9463119506836, 139.71713256835938, 33.616912841796875, -91.11328125, 128.46652221679688, -77.89419555664062, -20.56488037109375, 56.44403076171875, 78.0789794921875, -30.97979736328125, 33.13030242919922, 13.150177001953125, 73.75961303710938, -75.34014892578125, 137.58041381835938, 95.23361206054688, 149.11727905273438, 1.22943115234375, 31.136489868164062, -58.7808837890625, 212.5545654296875, 108.78778076171875, 136.1905517578125, 411.7734375, 189.55307006835938, -18.886505126953125, -29.67431640625, 140.29376220703125, 17.026905059814453, -26.5498046875, 3.334930419921875, -33.78102111816406, 181.89340209960938, 15.661705017089844, 26.51190185546875, -9.63720703125, 16.723718643188477, 178.6380615234375, 120.7116928100586, 32.4814453125, 44.921142578125, -132.44955444335938, 101.37693786621094, 49.18585205078125, 106.23828125, 85.73542785644531, -25.545936584472656, 207.17491149902344, 144.1457977294922, -36.65020751953125, 79.8955078125, -127.18606567382812, 151.7144775390625, 2.3935089111328125, 127.10438537597656, 191.49072265625, 97.6895751953125, -49.74040222167969, 102.06674194335938, 146.1328887939453, 118.43563079833984, 161.5020751953125, 259.5025939941406, 66.125, 120.33529663085938, -35.897979736328125, 164.90731811523438, 26.712604522705078, 44.014923095703125, 0.934112548828125, 8.41546630859375, 45.897918701171875, 154.84765625, 0.0, 60.81103515625, 178.97039794921875, 96.95806884765625, 37.631103515625, 110.71951293945312, -137.72598266601562, -72.8736572265625, 9.7392578125, 80.06851196289062, 42.36717224121094, 143.97817993164062, 86.29121398925781, -25.586669921875, 218.32763671875, 142.81512451171875, 100.33892822265625, 4.384307861328125, 181.5770263671875, -12.26763916015625, 89.7762451171875, 150.7803497314453, 6.3681488037109375, 156.55633544921875, 0.0, 40.23976135253906], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000205.npy"}
{"epoch": 0.4293193717277487, "step": 206, "batch_size": 128, "mean": 49.65898513793945, "std": 92.61605834960938, "min": -204.0252685546875, "p10": -69.12197875976562, "median": 38.06422424316406, "p90": 172.52948150634765, "max": 270.31304931640625, "pos_frac": 0.6953125, "sample": [105.41403198242188, 85.60873413085938, -35.80855178833008, 40.1689453125, -111.98146057128906, 18.197662353515625, 103.67340087890625, -21.097740173339844, 158.5850830078125, 5.864250183105469, -3.618377685546875, 225.8585205078125, 220.74368286132812, 99.43896484375, 122.73529052734375, 119.40293884277344, 158.73318481445312, 40.089508056640625, -9.63037109375, -5.01129150390625, 24.09869384765625, -17.927696228027344, -1.9046630859375, -0.8758430480957031, -68.87762451171875, -128.47879028320312, 191.27732849121094, 154.31265258789062, 183.5452117919922, 22.782318115234375, 1.6218605041503906, 160.73297119140625, 204.03408813476562, -102.16246795654297, -77.8896484375, 45.462646484375, 169.36288452148438, 171.9256134033203, 102.853271484375, 50.23609161376953, -0.6096954345703125, 173.93850708007812, -22.25387954711914, 8.934356689453125, 91.7780990600586, 117.4552001953125, 10.912073135375977, -69.692138671875, -10.800516128540039, 42.22589111328125, 91.10381317138672, 101.37969970703125, 106.50628662109375, 1.19171142578125, 185.23602294921875, 180.28103637695312, 121.44921875, 25.94024658203125, 35.90302276611328, -104.14433288574219, 71.65069580078125, -84.15278625488281, 143.3480224609375, 150.03656005859375, -3.970733642578125, 120.1650390625, 104.74186706542969, 106.58889770507812, 36.0389404296875, 96.55500030517578, 4.657501220703125, 17.67523193359375, 68.13507080078125, 22.40008544921875, -35.0855712890625, 52.8865966796875, 55.217803955078125, 219.3265380859375, -204.0252685546875, -40.184356689453125, 144.5029296875, -129.18182373046875, 209.02301025390625, 33.27946472167969, 43.318145751953125, 153.50521850585938, -96.107666015625, -4.3172607421875, 31.951858520507812, 5.332916259765625, 71.25309753417969, 97.86425018310547, 115.978271484375, 7.028564453125, -43.803375244140625, 134.0631866455078, 215.2734375, -51.12660217285156, -139.59402465820312, 87.67623901367188, 102.17962646484375, 145.37445068359375, 44.1739501953125, 270.31304931640625, 68.99942779541016, -18.0977783203125, 10.591796875, 20.62579345703125, 120.170654296875, -47.06706237792969, -141.86715698242188, 149.58425903320312, -37.31285095214844, 10.976303100585938, 112.0478515625, 14.940322875976562, -50.089996337890625, -11.2308349609375, -13.911590576171875, 211.40707397460938, 103.36053466796875, -6.80426025390625, -29.782135009765625, 108.01821899414062, 31.605133056640625, 16.946502685546875, 5.06201171875, -110.01795959472656], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000206.npy"}
{"epoch": 0.431413612565445, "step": 207, "batch_size": 128, "mean": 44.436073303222656, "std": 99.25227355957031, "min": -387.36676025390625, "p10": -66.31696777343748, "median": 33.456912994384766, "p90": 174.22520141601564, "max": 282.72320556640625, "pos_frac": 0.7265625, "sample": [138.30889892578125, 107.93109130859375, 25.81182861328125, -33.03667449951172, 48.0230712890625, -72.74868774414062, 151.59613037109375, 130.98854064941406, 1.5842361450195312, 98.5462646484375, 157.30374145507812, 5.802490234375, -86.54171752929688, 50.57087707519531, -387.36676025390625, 17.956298828125, 30.066513061523438, -114.5599365234375, 197.39434814453125, -148.36227416992188, 66.37704467773438, -22.560684204101562, 10.502716064453125, -170.172607421875, 18.446876525878906, -90.50405883789062, 70.90188598632812, 83.27877044677734, 12.491035461425781, 118.80056762695312, -25.525360107421875, 16.382598876953125, 17.962722778320312, 91.49375915527344, 88.39218139648438, 176.77267456054688, 53.33306884765625, -1.8737869262695312, 48.45574188232422, 127.5159912109375, 155.54495239257812, -48.027374267578125, -181.42678833007812, -16.090965270996094, -63.560516357421875, 12.90966796875, 221.0748291015625, 0.0, 29.606887817382812, 0.10427093505859375, 42.585693359375, 2.140960693359375, 125.75384521484375, 195.9275665283203, -28.44830322265625, 102.98729705810547, 13.378229141235352, 279.21795654296875, 12.730865478515625, 36.26940155029297, 85.900634765625, 139.90904235839844, 173.88330078125, -89.06854248046875, 110.85920715332031, -9.770782470703125, -43.83704376220703, 100.976318359375, -107.49169921875, 7.254486083984375, 65.22592163085938, -19.64666748046875, 86.28169250488281, 90.37332153320312, -14.711807250976562, -24.051864624023438, 89.09391784667969, -12.08184814453125, 108.86053466796875, 183.72625732421875, 115.72943115234375, 282.72320556640625, 277.470703125, 205.0816650390625, 7.682157516479492, 76.56240844726562, 37.366485595703125, -1.4790496826171875, -47.3638916015625, 18.050743103027344, -62.72149658203125, 24.797378540039062, 34.37528991699219, -135.67752075195312, 133.28042602539062, 174.00082397460938, -107.451171875, -3.2877197265625, 21.29571533203125, 176.43890380859375, -0.60723876953125, -10.36865234375, 8.103170394897461, 67.60258483886719, -155.01666259765625, 57.500152587890625, 177.12904357910156, 13.903961181640625, 206.81185913085938, 6.31011962890625, 14.638389587402344, 49.923248291015625, 38.96046447753906, 51.10169982910156, 36.346527099609375, 111.28939819335938, 10.666830062866211, 159.93075561523438, 32.538536071777344, 174.74874877929688, -12.417205810546875, 95.776123046875, 130.5775146484375, 154.85684204101562, 19.068679809570312, 100.70252990722656, 32.09222412109375, 64.67071533203125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000207.npy"}
{"epoch": 0.43350785340314135, "step": 208, "batch_size": 128, "mean": 58.209503173828125, "std": 89.01133728027344, "min": -171.49249267578125, "p10": -28.182342910766597, "median": 46.80001449584961, "p90": 163.8144500732422, "max": 514.925048828125, "pos_frac": 0.7109375, "sample": [44.16387939453125, 56.5989990234375, -30.965469360351562, 59.32481384277344, 83.38864135742188, 118.86392211914062, -18.687400817871094, 163.59799194335938, 142.6724853515625, 32.21621322631836, 1.12548828125, 282.27886962890625, 0.0, 180.3310546875, -59.623992919921875, -5.816398620605469, -5.38043212890625, -0.6201019287109375, 51.86639404296875, 32.622947692871094, 179.73065185546875, 15.2103271484375, 5.389007568359375, -10.063568115234375, -13.22412109375, 212.8043212890625, 157.3316650390625, 123.4852294921875, 101.12974548339844, 40.384765625, 23.161865234375, -126.9447021484375, -77.6884765625, -7.35302734375, -4.098819732666016, 142.94522094726562, -114.3067626953125, 81.24217224121094, 42.131103515625, 44.5653076171875, -171.49249267578125, 158.43045043945312, -12.773483276367188, 127.0822525024414, 20.960006713867188, -31.7652587890625, 47.54524230957031, 173.472900390625, 46.510772705078125, 133.6088409423828, 514.925048828125, 0.0, 20.898406982421875, 12.625520706176758, -3.42327880859375, 30.464332580566406, 5.9874114990234375, 32.15074920654297, -17.30804443359375, -34.34006118774414, -15.31903076171875, 42.59271240234375, -49.577880859375, 113.94189453125, -31.3453369140625, 194.4296875, -26.989574432373047, 8.737213134765625, 12.987211227416992, -91.48236083984375, 30.006698608398438, 81.409912109375, 97.52569580078125, 111.42184448242188, 192.7874755859375, 91.16087341308594, 22.993896484375, -7.1939697265625, -1.9122314453125, 164.31951904296875, 64.81952667236328, 135.8983154296875, 75.862548828125, 105.20510864257812, 49.35398864746094, 73.35924530029297, -8.845924377441406, 139.8123779296875, 38.32670593261719, 58.05712890625, -34.44671630859375, 51.157257080078125, 196.90460205078125, 69.6004638671875, 19.072715759277344, 57.124176025390625, 103.80618286132812, 67.67340087890625, 40.843162536621094, 262.1527404785156, 134.084228515625, -7.53851318359375, 86.88177490234375, 120.438720703125, 47.089256286621094, -17.42102813720703, 251.37103271484375, 81.65887451171875, -2.531097412109375, 96.37333679199219, -1.0546798706054688, 129.994873046875, -7.697959899902344, 194.74417114257812, 99.725341796875, 152.1646728515625, 1.3182754516601562, 13.415145874023438, 61.74073791503906, 54.429656982421875, 53.524566650390625, 50.489593505859375, 140.6006317138672, 64.307861328125, 149.1884765625, -79.15350341796875, -6.34393310546875, 85.43597412109375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000208.npy"}
{"epoch": 0.4356020942408377, "step": 209, "batch_size": 128, "mean": 41.179481506347656, "std": 87.31407928466797, "min": -205.751220703125, "p10": -47.93343963623047, "median": 29.8062744140625, "p90": 158.5225402832031, "max": 264.12841796875, "pos_frac": 0.6171875, "sample": [167.81771850585938, 89.36370849609375, 36.037872314453125, 120.89202880859375, -19.002609252929688, -124.51918029785156, -114.1900634765625, -22.428619384765625, -163.37429809570312, 11.353036880493164, 7.520660400390625, 107.26507568359375, -25.99859619140625, 142.95751953125, 166.07571411132812, 74.47164916992188, 131.71990966796875, -48.72889709472656, -14.6220703125, 45.0858154296875, 45.32196044921875, 121.21578979492188, 197.77847290039062, -128.63226318359375, -8.978057861328125, -9.632049560546875, 48.935203552246094, -22.356582641601562, 100.71566772460938, 65.21156311035156, -33.854427337646484, 160.88055419921875, -24.22802734375, -9.924301147460938, 74.66171264648438, -205.751220703125, 243.03439331054688, -9.585739135742188, 0.0, 3.68798828125, 124.32220458984375, 60.0823974609375, -34.25254821777344, -90.54124450683594, 100.716552734375, 9.249786376953125, -5.3167724609375, 7.079023361206055, 122.79461669921875, 118.46661376953125, 43.33251953125, 169.54464721679688, -19.11651611328125, 7.98931884765625, -47.592529296875, -15.656356811523438, -14.958969116210938, 173.81451416015625, 86.59996032714844, -38.291603088378906, 264.12841796875, 50.6861572265625, 39.60588073730469, 175.49737548828125, -66.49014282226562, 32.5550537109375, 156.50640869140625, -20.014984130859375, -32.28289794921875, 51.04693603515625, 148.13958740234375, -41.68499755859375, 45.6497802734375, 12.048606872558594, 5.745546340942383, 33.94903564453125, -3.8924560546875, 7.3508148193359375, 118.96836853027344, -7.235441207885742, 24.191055297851562, 145.99209594726562, 65.4947509765625, 0.0, -10.731643676757812, -126.81932067871094, -4.1819610595703125, 17.717254638671875, -44.730560302734375, -63.5369873046875, 38.77836608886719, -16.697906494140625, 182.1214599609375, -102.36712646484375, 109.22850799560547, 92.5875244140625, 28.933197021484375, 14.327983856201172, 162.82073974609375, 156.34933471679688, 30.679351806640625, 44.61102294921875, 9.785865783691406, 11.44461441040039, -56.618927001953125, 109.38702392578125, 127.9478759765625, -24.680984497070312, 99.41293334960938, -123.3187255859375, -3.949432373046875, 200.02197265625, 63.5450439453125, 114.60342407226562, -19.2857666015625, 67.71812438964844, 138.43331909179688, 150.20907592773438, 98.03506469726562, 106.17529296875, -17.574798583984375, 126.12570190429688, -7.358816146850586, -13.43988037109375, 184.07797241210938, 152.87677001953125, -29.618408203125, 157.511962890625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000209.npy"}
{"epoch": 0.437696335078534, "step": 210, "batch_size": 128, "mean": 50.231658935546875, "std": 83.96781158447266, "min": -202.12188720703125, "p10": -38.55639038085937, "median": 42.502113342285156, "p90": 162.9575256347656, "max": 290.88861083984375, "pos_frac": 0.7109375, "sample": [2.821380615234375, -12.36328125, 52.61358642578125, -8.90679931640625, 17.64215087890625, -12.929168701171875, 165.564453125, 90.78530883789062, 55.25190734863281, 28.89862823486328, 57.62439727783203, -10.9735107421875, 170.14825439453125, 19.705453872680664, 86.0128173828125, -10.84088134765625, -23.783660888671875, 148.92799377441406, -71.2108154296875, -16.038711547851562, -36.06141662597656, 228.74032592773438, 3.38580322265625, 83.3363037109375, -11.238643646240234, 50.7811279296875, -6.3527984619140625, 242.841064453125, 3.3884124755859375, 99.31671142578125, 31.460250854492188, -143.3226776123047, 143.70724487304688, 31.843658447265625, 147.218017578125, 167.68719482421875, 11.370765686035156, 106.21929931640625, -2.3873062133789062, 71.535888671875, 15.643917083740234, 65.40229797363281, -67.54928588867188, -14.895553588867188, 126.19293212890625, 181.85479736328125, 80.77654266357422, 82.84652709960938, 127.41412353515625, 90.529296875, -114.01007080078125, -58.96961975097656, 132.14654541015625, 225.93069458007812, 48.319007873535156, 91.75430297851562, -3.9860000610351562, 41.344879150390625, 228.3087158203125, 30.127593994140625, 17.68701171875, 183.64950561523438, 42.18145751953125, -67.77540588378906, 138.09185791015625, 76.51158142089844, 71.48165893554688, -5.815662384033203, 35.19935607910156, 62.72021484375, 14.333101272583008, -32.9774169921875, 36.45295715332031, 30.851402282714844, -0.3955078125, 290.88861083984375, 75.91094970703125, -202.12188720703125, 118.29962158203125, -14.76153564453125, 123.81967163085938, 42.82276916503906, 161.84027099609375, 251.16677856445312, 166.41949462890625, 89.96884155273438, 64.74179077148438, 33.964874267578125, -10.53680419921875, 8.295501708984375, -55.30628967285156, 12.702926635742188, 103.31085205078125, -64.0555419921875, 84.51982879638672, 61.5894775390625, 8.216384887695312, 87.12466430664062, 85.20684814453125, 7.889991760253906, 69.1850357055664, 60.375396728515625, 98.47555541992188, -4.6694488525390625, -148.589599609375, 12.063018798828125, 83.46392822265625, 72.36650085449219, 84.0601806640625, 82.8365478515625, 205.72906494140625, 131.74449157714844, -2.286968231201172, 142.5941162109375, -40.084564208984375, -118.35403442382812, -12.454864501953125, -49.60894775390625, 91.93196105957031, 113.69192504882812, 2.2608489990234375, -16.118988037109375, 1.6368560791015625, 25.034912109375, -37.901458740234375, -7.844024658203125, 65.76469421386719, 128.63568115234375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000210.npy"}
{"epoch": 0.4397905759162304, "step": 211, "batch_size": 128, "mean": 65.2879867553711, "std": 77.00318145751953, "min": -151.18856811523438, "p10": -11.447937393188472, "median": 49.39424133300781, "p90": 164.0919281005859, "max": 251.6435546875, "pos_frac": 0.796875, "sample": [-6.182853698730469, 64.69676208496094, 200.5313720703125, 158.56695556640625, 6.3049468994140625, 56.217323303222656, 78.18110656738281, 132.69485473632812, 13.059883117675781, -1.0102272033691406, -151.18856811523438, -57.284637451171875, 33.53416442871094, 18.00463104248047, 127.2510986328125, 14.70465087890625, -9.224319458007812, -7.891143798828125, -22.58953857421875, 135.06170654296875, 228.4573211669922, 215.48001098632812, 19.54831886291504, 157.34188842773438, 87.47994995117188, 83.37608337402344, 64.95474243164062, 8.921142578125, 208.685302734375, 64.81796264648438, 102.49748992919922, 51.574493408203125, 143.0821990966797, 131.6937713623047, 27.923606872558594, 16.637598037719727, 91.30809020996094, -14.526531219482422, -33.154388427734375, 157.75714111328125, 114.98616027832031, 77.50320434570312, 31.92694091796875, -5.7859344482421875, -5.170684814453125, 5.7919464111328125, 251.6435546875, 87.30317687988281, 133.5623779296875, 1.2712574005126953, -50.3267822265625, 0.1334228515625, 118.1090087890625, 118.95291137695312, 101.31777954101562, 17.041061401367188, 136.80303955078125, 158.46688842773438, 15.978668212890625, -3.250030517578125, 201.5272216796875, 109.44631958007812, 169.63076782226562, 36.46148681640625, -3.74365234375, 87.1866455078125, 56.00071716308594, -14.657501220703125, 15.977783203125, 14.846221923828125, 19.741790771484375, 39.654449462890625, 43.46814727783203, 81.33041381835938, 46.243865966796875, -65.32138061523438, 117.3292236328125, -70.55332946777344, 66.42041015625, 85.03643798828125, -3.0032081604003906, 27.483720779418945, 3.4619674682617188, 126.81407928466797, -126.1557846069336, 17.30364227294922, 66.67449951171875, 49.31622314453125, 32.62053680419922, -3.7271652221679688, 11.342758178710938, 203.140625, 47.54322814941406, 78.96817779541016, 10.842987060546875, 86.38325500488281, 49.472259521484375, 45.90643310546875, -18.428268432617188, 212.46212768554688, -18.68951416015625, 220.43060302734375, 96.593994140625, 106.33929443359375, 39.94349670410156, -10.1219482421875, -15.46942138671875, 161.7181396484375, -10.1285400390625, 44.2437744140625, 45.06526184082031, 247.86614990234375, 145.62307739257812, 44.389892578125, 186.66336059570312, 125.98275756835938, 202.11068725585938, 117.63467407226562, 90.071533203125, 9.988117218017578, 133.5640869140625, -4.381439208984375, 154.6895751953125, 35.838783264160156, 20.230079650878906, 140.51817321777344, 56.879974365234375, 131.26744079589844], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000211.npy"}
{"epoch": 0.4418848167539267, "step": 212, "batch_size": 128, "mean": 40.951171875, "std": 79.04873657226562, "min": -210.22872924804688, "p10": -44.502467346191395, "median": 34.03138732910156, "p90": 147.85140991210938, "max": 260.5467529296875, "pos_frac": 0.734375, "sample": [-0.020877838134765625, 56.334228515625, 30.955154418945312, -6.9658966064453125, 28.861312866210938, -4.7080841064453125, 36.37546157836914, 74.9549560546875, 11.642290115356445, 125.4593505859375, 112.93946838378906, 86.76718139648438, -90.79783630371094, -110.81816101074219, 41.65009307861328, 62.4632568359375, 21.4949951171875, 125.99203491210938, -30.80402374267578, 90.58868408203125, 11.4010009765625, 85.4952163696289, 7.261405944824219, 57.97686767578125, -27.740509033203125, 114.643310546875, 15.807205200195312, 49.89874267578125, 37.967071533203125, 13.785276412963867, 55.260986328125, -33.79273986816406, -8.563095092773438, -31.580764770507812, -10.4754638671875, 6.1233062744140625, 146.67816162109375, 210.9727020263672, 8.118392944335938, 90.68115234375, 9.28662109375, 33.569061279296875, -68.03036499023438, 27.6182861328125, -155.63323974609375, 113.35455322265625, 75.75547790527344, -19.83447265625, 62.505523681640625, 164.97030639648438, 64.26679992675781, 171.3037872314453, 79.40587615966797, 240.14642333984375, 102.6610107421875, 1.9118499755859375, -136.8968048095703, 36.937957763671875, 70.01751708984375, -51.969757080078125, 56.115386962890625, -10.427619934082031, 25.894489288330078, 14.39678955078125, 49.56878662109375, -72.58059692382812, 138.13922119140625, -11.733535766601562, 117.33419799804688, -143.5733642578125, 6.710418701171875, 3.6926746368408203, 104.34709167480469, 64.6146240234375, 74.1890869140625, -25.99413299560547, -55.31340789794922, 198.15692138671875, 20.181060791015625, -5.581878662109375, -73.64747619628906, 7.335357666015625, 175.7078094482422, 29.017166137695312, 49.93359375, -210.22872924804688, 57.035614013671875, 70.65298461914062, -2.168212890625, 2.65185546875, 131.43255615234375, 150.5889892578125, 34.49371337890625, 44.91505432128906, -2.13433837890625, 156.98704528808594, 176.23745727539062, 1.1537055969238281, 90.007568359375, 1.6021881103515625, -39.868194580078125, 130.77088928222656, 42.973876953125, 93.7567138671875, 88.95869445800781, 23.495407104492188, 99.000732421875, -77.0712890625, -31.115097045898438, 5.966352462768555, 161.56036376953125, 18.837020874023438, 8.036125183105469, 140.26898193359375, -27.602867126464844, 181.49215698242188, 74.35848999023438, -20.439102172851562, 76.37696838378906, -55.144866943359375, 44.943511962890625, -41.30220031738281, 156.2310791015625, 29.451385498046875, 11.8984375, 105.03666687011719, 260.5467529296875, 56.994659423828125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000212.npy"}
{"epoch": 0.44397905759162304, "step": 213, "batch_size": 128, "mean": 44.97577667236328, "std": 91.84402465820312, "min": -253.97622680664062, "p10": -62.95960388183594, "median": 33.845130920410156, "p90": 155.30592956542966, "max": 416.0152587890625, "pos_frac": 0.7265625, "sample": [73.70355224609375, 112.55838012695312, 28.073455810546875, 102.924560546875, -39.46315002441406, -18.732986450195312, 97.97859954833984, 3.5848388671875, -8.774368286132812, 11.9378662109375, 37.672698974609375, 51.233642578125, -21.98626708984375, 54.491973876953125, 6.648433685302734, 181.98312377929688, 25.96160888671875, 68.3587646484375, 216.372802734375, -19.894866943359375, 60.182403564453125, -46.21392822265625, -62.6827392578125, 416.0152587890625, -13.523002624511719, 10.58090591430664, 1.521585464477539, -63.605621337890625, 60.37640380859375, 169.021728515625, 11.492422103881836, -98.23822021484375, 20.973236083984375, 103.27822875976562, 128.02322387695312, 93.64935302734375, 46.120948791503906, 43.31396484375, 34.804168701171875, -91.26473999023438, 56.4039306640625, -3.709014892578125, 204.49423217773438, -34.3331298828125, 153.64480590820312, -0.25280189514160156, 114.00796508789062, 3.937837600708008, -89.92019653320312, -12.339691162109375, 129.49891662597656, -79.853271484375, -9.9962158203125, 24.646926879882812, 139.21461486816406, 119.26148986816406, -22.602272033691406, 101.61566162109375, -97.0552978515625, 73.05767822265625, 111.99603271484375, 1.3168468475341797, -19.635353088378906, 47.45556640625, 16.3642578125, 159.181884765625, 103.49293518066406, 55.300628662109375, 112.58460998535156, 84.48164367675781, 45.81245422363281, 43.69837951660156, 289.5833740234375, 58.6395263671875, 8.746734619140625, 114.04605102539062, -19.946609497070312, 98.30204010009766, -36.178131103515625, 166.47210693359375, 79.66757202148438, -99.47962951660156, 1.5015869140625, 3.7653884887695312, -71.14236450195312, 171.28399658203125, 97.99737548828125, -36.907020568847656, 4.4822998046875, 148.79364013671875, 18.19824981689453, 217.5467987060547, 8.858509063720703, 65.256103515625, -66.79396057128906, 33.64613342285156, 46.072906494140625, 29.828887939453125, 8.212278366088867, 187.7052001953125, -126.60003662109375, 223.92816162109375, 72.50009155273438, 13.42755126953125, 34.7828369140625, 20.350112915039062, 42.39128112792969, -253.97622680664062, 12.25274658203125, 352.50732421875, -0.136260986328125, 5.65936279296875, 97.37623596191406, 140.58810424804688, -1.5032882690429688, 83.09024047851562, 21.13946533203125, 0.0, 8.505382537841797, 137.42318725585938, 76.09378814697266, 34.04412841796875, 51.58984375, -109.55593872070312, 74.07803344726562, 14.083091735839844, -48.84516906738281, -66.6844711303711], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000213.npy"}
{"epoch": 0.44607329842931936, "step": 214, "batch_size": 128, "mean": 52.398353576660156, "std": 77.66922760009766, "min": -171.11798095703125, "p10": -35.681589126586914, "median": 45.625946044921875, "p90": 152.0353805541992, "max": 314.1441650390625, "pos_frac": 0.7734375, "sample": [48.69683837890625, 24.746307373046875, 4.2967681884765625, 11.121826171875, 11.2423095703125, -36.83281707763672, 98.86332702636719, 30.54833984375, -54.25738525390625, 106.8079605102539, 28.79962158203125, 61.870941162109375, 32.360260009765625, -3.3671302795410156, 87.13511657714844, 88.90386962890625, 53.34373474121094, 1.9093170166015625, 53.00596618652344, -91.42696380615234, 10.506881713867188, 63.8851318359375, -22.79302978515625, 36.717803955078125, -55.89778137207031, -171.11798095703125, 144.54193115234375, 132.513671875, 60.0968017578125, 32.91973876953125, 45.504364013671875, 263.2689208984375, 85.54663848876953, 147.55984497070312, 62.606353759765625, -28.462310791015625, 314.1441650390625, 190.00094604492188, 120.93861389160156, 7.1559906005859375, 4.312080383300781, -70.15780639648438, 28.149917602539062, 33.578433990478516, -48.348968505859375, 149.66590881347656, 3.8491744995117188, 159.8355255126953, -27.395782470703125, 0.0, 23.943557739257812, 62.595703125, 145.61767578125, 123.20794677734375, 28.220306396484375, 67.40156555175781, 85.50132751464844, -53.8397216796875, 137.31407165527344, -5.962615966796875, 131.75340270996094, 45.747528076171875, -18.416259765625, 18.3284912109375, 18.3369140625, -28.026168823242188, 77.4996337890625, 136.071044921875, 63.70664978027344, -90.83578491210938, -63.28279113769531, 47.273162841796875, 190.818359375, 66.95634460449219, 44.641822814941406, -57.97314453125, 12.8372802734375, -1.1717529296875, -20.728759765625, 71.98406982421875, 33.099334716796875, 24.307281494140625, 132.26397705078125, 19.92882537841797, -15.151031494140625, 223.12869262695312, 211.59063720703125, 167.05508422851562, 8.117431640625, 22.442855834960938, -73.1221923828125, 70.8390121459961, 67.38494110107422, 23.128742218017578, 73.12557983398438, 46.94197082519531, 91.7618408203125, 96.6971435546875, -20.5335693359375, 103.75772094726562, 54.67315673828125, -33.79096221923828, -4.1456298828125, 202.6151123046875, 78.07080078125, 64.76839447021484, 3.7194366455078125, 28.675811767578125, 56.82147216796875, 157.56414794921875, 66.89646911621094, 5.9183349609375, 13.102432250976562, -35.18820571899414, 119.84281921386719, 101.62645721435547, -53.3516845703125, 56.7509765625, 217.4315185546875, 52.99212646484375, 159.34103393554688, 142.80316162109375, 136.7518310546875, 22.963760375976562, -7.007162094116211, 181.59124755859375, 89.76724243164062, 28.607666015625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000214.npy"}
{"epoch": 0.4481675392670157, "step": 215, "batch_size": 128, "mean": 39.17930603027344, "std": 90.61285400390625, "min": -234.65130615234375, "p10": -66.40779724121094, "median": 18.46742820739746, "p90": 156.81439819335935, "max": 369.21417236328125, "pos_frac": 0.6484375, "sample": [45.031009674072266, -27.243881225585938, 25.0404052734375, 163.85357666015625, -15.69354248046875, -234.65130615234375, 9.276424407958984, 40.64410400390625, 132.15806579589844, 199.77703857421875, 369.21417236328125, 97.71660614013672, 161.0469970703125, 41.011383056640625, 1.6621627807617188, 304.1722412109375, 73.86050415039062, 13.482513427734375, -69.50100708007812, -6.236839294433594, -15.793462753295898, 34.44482421875, 105.12562561035156, 213.06912231445312, 27.800521850585938, 37.518348693847656, 42.424530029296875, -15.159149169921875, 38.69755554199219, 37.536529541015625, 83.99836730957031, 58.31782531738281, 213.22833251953125, 99.630859375, 105.6583251953125, 42.43914031982422, 91.60813903808594, -51.03192901611328, -12.7664794921875, 176.33770751953125, 143.46725463867188, -6.653656005859375, 0.0, -69.67340087890625, 182.95086669921875, -8.28045654296875, -51.724761962890625, -5.719837188720703, 153.88571166992188, 43.34442138671875, -1.9458160400390625, 28.08381462097168, -66.43084716796875, 107.32877349853516, -16.968482971191406, 2.7282485961914062, -69.5450439453125, -8.687156677246094, 101.95781707763672, 26.382293701171875, -1.756988525390625, 60.01966857910156, -122.99468994140625, -1.6488037109375, 91.80191040039062, -4.9192657470703125, 155.00042724609375, 133.6727294921875, -10.455078125, 6.268798828125, 1.78900146484375, -28.880935668945312, 2.305011749267578, 6.718006134033203, 67.50874328613281, 119.75982666015625, 56.19117736816406, -39.859619140625, 0.0, 18.528152465820312, -3.2366180419921875, 143.3797607421875, -109.19384765625, 108.424560546875, -112.92881774902344, -3.959869384765625, 11.4332275390625, 64.17478942871094, 127.27951049804688, -109.77505493164062, -22.383697509765625, 13.512176513671875, 100.9654312133789, -55.484344482421875, 72.80615234375, 125.56256103515625, 238.658447265625, -81.85882568359375, 13.586456298828125, 52.891357421875, 5.415987014770508, 71.73091125488281, -66.39791870117188, 17.352310180664062, 250.15365600585938, 5.205711364746094, 161.37078857421875, 14.328109741210938, -20.468536376953125, 94.2930908203125, 73.22018432617188, -38.356414794921875, 36.34033203125, -138.68507385253906, 18.40670394897461, 67.3485107421875, -8.702880859375, 152.94830322265625, 29.916107177734375, 3.746145248413086, -73.32508850097656, -7.415771484375, 15.41571044921875, 17.797698974609375, 25.368560791015625, -67.98477172851562, -63.0357666015625, 233.85824584960938], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000215.npy"}
{"epoch": 0.450261780104712, "step": 216, "batch_size": 128, "mean": 52.98881912231445, "std": 93.02864837646484, "min": -198.72122192382812, "p10": -46.968142700195315, "median": 49.787506103515625, "p90": 162.39020690917968, "max": 390.502197265625, "pos_frac": 0.734375, "sample": [-20.19591522216797, 123.02806091308594, 154.75143432617188, 84.01881408691406, -63.27874755859375, 11.54833984375, 65.71622467041016, 24.807512283325195, 390.502197265625, 5.722007751464844, 94.81515502929688, 119.21498107910156, -3.7685012817382812, 34.07941436767578, 58.77099609375, -13.399200439453125, 54.018035888671875, 91.8812255859375, 235.175048828125, -107.72409057617188, 34.87060546875, -12.013290405273438, 107.72184753417969, 50.63250732421875, -47.22015380859375, -28.197006225585938, 70.205322265625, 28.9918212890625, 67.21243286132812, 80.99151611328125, 123.01937866210938, 69.53746032714844, 22.8062744140625, 133.2154541015625, -151.99411010742188, 51.036773681640625, 159.56546020507812, -198.72122192382812, 237.3760986328125, 173.09320068359375, 49.857666015625, -121.23583984375, -0.2774505615234375, -19.403526306152344, 168.63583374023438, 155.8939666748047, 3.5715560913085938, 57.223175048828125, -89.07940673828125, 47.79840087890625, 109.4779052734375, 95.80157470703125, 217.37677001953125, 82.34248352050781, 0.0, 106.42657470703125, 226.5279541015625, 86.0440673828125, -41.51939392089844, -31.935516357421875, 19.133087158203125, 41.464111328125, 7.0264892578125, 287.2236022949219, 48.352813720703125, -6.055564880371094, 161.02780151367188, 112.97308349609375, -23.044036865234375, 221.19154357910156, 110.15814208984375, 165.56915283203125, 6.202545166015625, 23.27923583984375, -83.33642578125, 22.344375610351562, 30.404449462890625, 184.80191040039062, -30.49285888671875, 14.644721984863281, -101.3212890625, 17.8363037109375, 150.88336181640625, 209.45108032226562, 130.04153442382812, -113.14854431152344, 2.9089813232421875, 102.58391571044922, 134.443115234375, 192.3603515625, -90.78726196289062, -16.87847900390625, 49.71734619140625, -171.97842407226562, 99.54597473144531, 75.18614196777344, 53.4111328125, -7.884773254394531, 50.993743896484375, 99.47286987304688, 83.38360595703125, 139.929931640625, 56.63427734375, 100.16220092773438, 26.725784301757812, 131.0701904296875, 11.725128173828125, -131.5070037841797, 0.0, 31.977783203125, -46.860137939453125, 139.25680541992188, -0.919677734375, 160.180419921875, 79.515380859375, 29.60382080078125, 94.37815856933594, -5.033966064453125, 1.0499095916748047, 93.4234619140625, 13.07403564453125, 114.76580810546875, -13.31341552734375, 62.748291015625, 7.9473876953125, 4.3526611328125, -25.904510498046875, 29.1573486328125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000216.npy"}
{"epoch": 0.4523560209424084, "step": 217, "batch_size": 128, "mean": 65.11129760742188, "std": 91.91735076904297, "min": -168.820556640625, "p10": -48.651957702636714, "median": 61.01531982421875, "p90": 175.75408935546872, "max": 357.0836181640625, "pos_frac": 0.7734375, "sample": [38.775657653808594, 34.937042236328125, -58.490478515625, 57.03578186035156, -21.774261474609375, 263.4599609375, 264.80987548828125, -15.05169677734375, 28.485511779785156, 83.46310424804688, 136.99029541015625, -23.66656494140625, 26.06427001953125, 28.4962158203125, 11.25283432006836, -16.62213897705078, -51.48223876953125, -2.002777099609375, 53.86900329589844, 131.45736694335938, -0.06015777587890625, -5.7618560791015625, 4.880149841308594, 107.92315673828125, 31.796119689941406, 152.30621337890625, 26.350242614746094, -5.363735198974609, 29.0931396484375, 122.6927490234375, 80.94833374023438, 35.71980285644531, 69.64752197265625, -31.693756103515625, 58.52516174316406, 286.2824401855469, 220.98544311523438, 125.22088623046875, 167.416748046875, 60.560791015625, 101.39671325683594, 48.292999267578125, 93.64707946777344, 125.05313110351562, -52.132598876953125, 70.5775146484375, 124.89274597167969, -96.71475219726562, 8.996580123901367, 40.93516540527344, 148.39801025390625, 169.59788513183594, 35.4814453125, 0.0, 14.5782470703125, 83.59474182128906, -114.98605346679688, 184.19174194335938, 142.43026733398438, 214.61749267578125, 125.81056213378906, 43.15887451171875, -168.820556640625, -104.61421203613281, -94.69989013671875, 78.4278564453125, 122.32029724121094, 9.527587890625, -121.00436401367188, 137.44989013671875, 144.82461547851562, 166.167236328125, 357.0836181640625, 4.1131744384765625, 77.27839660644531, 69.27035522460938, 55.6513671875, 152.88333129882812, 69.81292724609375, -47.43898010253906, 135.06581115722656, -5.5083770751953125, 179.40768432617188, -61.04338073730469, -131.3509521484375, 182.7513427734375, 126.474609375, 13.725738525390625, 3.81817626953125, -116.04541015625, 61.4698486328125, 117.67681884765625, 99.51596069335938, -71.48902893066406, 96.52163696289062, 0.828887939453125, 76.07926940917969, 106.5338134765625, 3.56951904296875, 207.97203063964844, -7.8501129150390625, 105.15859985351562, 95.72736358642578, -7.4414215087890625, 80.0111083984375, 186.8753662109375, 48.41741943359375, 169.961181640625, 96.80635070800781, 68.974853515625, 21.151153564453125, 141.68817138671875, 103.04742431640625, -20.63488006591797, 12.528900146484375, 214.50372314453125, 84.33062744140625, 271.69610595703125, 114.91217041015625, 3.068145751953125, 56.210723876953125, 174.18826293945312, 123.91671752929688, 98.3572998046875, 110.82135009765625, -17.036056518554688, 49.55633544921875, 1.7984275817871094], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000217.npy"}
{"epoch": 0.4544502617801047, "step": 218, "batch_size": 128, "mean": 56.62894058227539, "std": 88.73031616210938, "min": -177.05679321289062, "p10": -42.69855041503906, "median": 47.547752380371094, "p90": 155.39459686279298, "max": 401.69134521484375, "pos_frac": 0.765625, "sample": [40.85650634765625, 32.67042541503906, 46.3162841796875, 150.76022338867188, 3.0904922485351562, 155.89202880859375, 39.45599365234375, 129.97918701171875, -48.847137451171875, 148.81776428222656, 15.658554077148438, 127.04658508300781, -0.915435791015625, 241.46414184570312, 147.46168518066406, 16.6650390625, 0.0, 87.43478393554688, 98.28329467773438, 7.2242431640625, 164.0249786376953, 280.17626953125, -119.3359375, 77.3572998046875, 138.69244384765625, 162.519287109375, 193.525146484375, 9.29608154296875, 88.19586181640625, 67.027587890625, -65.03594970703125, 149.39935302734375, -81.05079650878906, 50.29402160644531, 9.170928955078125, 401.69134521484375, 115.5375747680664, -111.96524047851562, 184.45159912109375, 121.34979248046875, -7.016300201416016, 115.51176452636719, 152.09854125976562, 135.4468536376953, 51.1658935546875, -2.6999740600585938, 97.22183227539062, 3.9311561584472656, -20.649520874023438, 122.00265502929688, 110.2486572265625, 92.14892578125, 16.610870361328125, 74.93118286132812, -19.460594177246094, 88.475830078125, 56.167938232421875, 61.9886474609375, -43.84574890136719, -27.2677001953125, 123.3487548828125, 37.1407470703125, -19.979049682617188, -26.170074462890625, 0.5953369140625, 35.86331558227539, -118.5805435180664, 9.089462280273438, 186.29998779296875, -5.358730316162109, 32.378662109375, 98.91736602783203, 32.04266357421875, 3.925048828125, -15.89410400390625, -6.2305908203125, 264.2740478515625, 91.31344604492188, 104.4505615234375, -83.68145751953125, 34.00164794921875, 48.77922058105469, 150.96295166015625, 67.92465209960938, 71.64642333984375, 55.4620361328125, -115.36473083496094, 92.39541625976562, 124.40771484375, 49.0753173828125, -42.20689392089844, 81.84916687011719, 2.34979248046875, 25.948055267333984, 22.47882080078125, -12.030670166015625, 155.18141174316406, 135.59844970703125, 123.81912231445312, -99.15155029296875, 19.515823364257812, 115.96296691894531, -4.397144317626953, -117.66342163085938, -5.93756103515625, 161.99545288085938, 116.09390258789062, 247.57110595703125, 26.13616943359375, 9.826812744140625, 14.078132629394531, 13.775924682617188, -84.1340103149414, 17.463241577148438, 32.9237060546875, 52.15315246582031, 9.967803955078125, 96.12274169921875, 30.060791015625, 135.218994140625, -7.46075439453125, -177.05679321289062, 110.60494995117188, 61.03778839111328, 80.5162353515625, 177.51922607421875, 38.374114990234375, 29.7161865234375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000218.npy"}
{"epoch": 0.45654450261780105, "step": 219, "batch_size": 128, "mean": 59.329734802246094, "std": 94.761962890625, "min": -280.3489990234375, "p10": -40.26356391906737, "median": 51.5731315612793, "p90": 202.37420349121092, "max": 339.2312927246094, "pos_frac": 0.7734375, "sample": [-165.4896240234375, 59.958892822265625, 53.695167541503906, 88.92082214355469, -145.05996704101562, 71.7215805053711, 10.507080078125, 43.42909240722656, 110.99111938476562, -27.009567260742188, 27.842559814453125, 167.134033203125, 119.16534423828125, 207.60226440429688, 0.0, 37.587158203125, -16.64028549194336, -11.494354248046875, -2.298553466796875, 103.09272766113281, 120.822509765625, 74.40487670898438, 71.29806518554688, 173.98617553710938, 52.28636932373047, 192.69456481933594, 186.73583984375, 107.162841796875, 66.0048828125, -280.3489990234375, 208.2989501953125, 225.6693115234375, 200.13360595703125, 66.38525390625, -49.027099609375, 83.50900268554688, -52.703887939453125, 129.55325317382812, 77.95199584960938, 144.60751342773438, 23.315414428710938, 10.119924545288086, 46.25244140625, 143.8629150390625, 14.276458740234375, 6.4463653564453125, -34.046897888183594, 50.859893798828125, 8.827606201171875, 123.99269104003906, 13.43408203125, 68.06515502929688, -11.658592224121094, 211.40283203125, 258.70013427734375, -109.28092956542969, 34.69496154785156, -51.59039306640625, 191.27081298828125, 0.020341873168945312, 29.100112915039062, 107.580810546875, 165.42959594726562, 91.09992980957031, 151.61590576171875, 55.300537109375, 61.1937255859375, 31.804779052734375, 218.2275390625, 238.6900634765625, 18.484956741333008, -35.284515380859375, 92.35066986083984, 102.31927490234375, 55.35337829589844, 10.629335403442383, -27.619873046875, 125.00601196289062, 95.59896850585938, -5.7388763427734375, 30.601516723632812, 210.49545288085938, 77.75604248046875, -7.34967041015625, 216.13461303710938, 91.36540222167969, 60.45050048828125, 48.63047790527344, 252.78277587890625, 4.06396484375, 83.54446411132812, 75.09881591796875, 19.02301025390625, 2.4984130859375, 87.98419189453125, 63.19288635253906, -33.81201171875, -94.11605834960938, -6.243522644042969, 33.09587097167969, 35.289146423339844, -57.76416015625, -15.588226318359375, 64.95684814453125, 79.26467895507812, 21.160629272460938, -121.28648376464844, 43.74129104614258, 151.60446166992188, 229.499267578125, 11.21881103515625, -47.5018310546875, -84.23104858398438, 1.60272216796875, 68.01371765136719, 227.77178955078125, 88.92312622070312, 50.19581604003906, -37.16144943237305, 17.311676025390625, 16.730621337890625, 45.537841796875, 21.15660858154297, 183.54183959960938, 339.2312927246094, 15.280914306640625, -20.162574768066406, -62.516021728515625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000219.npy"}
{"epoch": 0.4586387434554974, "step": 220, "batch_size": 128, "mean": 60.89457702636719, "std": 86.93477630615234, "min": -186.294677734375, "p10": -47.9242446899414, "median": 56.08006286621094, "p90": 172.10956726074218, "max": 318.90960693359375, "pos_frac": 0.75, "sample": [175.2730712890625, 318.90960693359375, 88.04373168945312, 89.29718017578125, -37.1068115234375, -24.886123657226562, 88.97171020507812, -5.550811767578125, 125.8739013671875, 171.24398803710938, 105.4552001953125, 14.434890747070312, 49.71331787109375, 35.9752197265625, 171.80908203125, 132.50332641601562, 14.549415588378906, -28.157852172851562, -8.59051513671875, 73.6514892578125, 205.0889892578125, 40.15907287597656, 78.10107421875, -0.6280059814453125, -51.691070556640625, 60.338401794433594, 40.409034729003906, 31.03509521484375, 87.86236572265625, -1.7608184814453125, 149.11083984375, -12.739013671875, -13.241533279418945, 13.304931640625, 19.96575927734375, 113.49542236328125, 23.114013671875, 58.313751220703125, 8.028696060180664, 185.0108642578125, 24.29138946533203, 58.045928955078125, 13.943775177001953, 8.555644989013672, -21.743255615234375, 265.38232421875, 50.56836700439453, 99.58037567138672, -61.71989440917969, 190.47726440429688, 156.65188598632812, 128.76882934570312, 135.90863037109375, 83.69336700439453, -86.77471923828125, -114.80911254882812, 83.23223876953125, 147.45254516601562, -62.21527099609375, 49.912322998046875, 146.788818359375, 98.84950256347656, 36.336483001708984, 103.30010986328125, 29.67888641357422, 110.66796875, 84.75022888183594, 41.299102783203125, 150.75949096679688, 127.06219482421875, 156.3983917236328, -33.37249755859375, 43.92327117919922, 145.57391357421875, 7.47021484375, 54.11419677734375, -25.1463623046875, 15.447402954101562, -10.9854736328125, 142.87326049804688, 23.89593505859375, 127.6387939453125, 104.14022827148438, 69.01496887207031, 69.82642364501953, -15.3997802734375, 210.10943603515625, -84.96188354492188, 224.13204956054688, 207.698486328125, -80.73947143554688, 29.341522216796875, -158.10665893554688, 45.4224853515625, 144.26242065429688, 60.79963684082031, 172.81069946289062, 154.55865478515625, 21.95263671875, 117.82943725585938, 199.17233276367188, 163.86041259765625, -33.61275863647461, -186.294677734375, -47.58892822265625, 18.111709594726562, 33.642616271972656, 18.837615966796875, -55.30995178222656, 90.85791015625, 162.46780395507812, 122.5731201171875, -94.948974609375, 66.91412353515625, 12.577634811401367, -1.611602783203125, 177.90643310546875, 191.47940063476562, 85.92678833007812, 67.28961181640625, -19.52105712890625, -50.26043701171875, 63.83978271484375, 134.59432983398438, 41.9278564453125, -23.08460235595703, 99.5574951171875, -48.70664978027344], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000220.npy"}
{"epoch": 0.4607329842931937, "step": 221, "batch_size": 128, "mean": 49.04707336425781, "std": 95.79632568359375, "min": -195.13961791992188, "p10": -58.49180297851562, "median": 38.28412628173828, "p90": 180.02638549804684, "max": 315.3606872558594, "pos_frac": 0.6875, "sample": [3.241943359375, 89.3291015625, 12.7215576171875, -29.0638427734375, 298.707275390625, -160.00698852539062, -5.4884033203125, -11.091400146484375, 119.60562133789062, -61.40251922607422, 68.42061614990234, -6.4684295654296875, 199.8092803955078, 29.242095947265625, -1.3720321655273438, 153.78921508789062, 69.65838623046875, 86.85808563232422, 112.07632446289062, 86.10427856445312, -114.05830383300781, 148.00717163085938, -87.86492919921875, 122.46682739257812, 239.50125122070312, 120.7545166015625, -60.200103759765625, 37.657432556152344, -35.630218505859375, -54.236454010009766, -8.071060180664062, -3.3319854736328125, -16.3126220703125, 80.31866455078125, 17.235191345214844, 62.801116943359375, -29.59735107421875, 88.90130615234375, 2.8033447265625, 249.73468017578125, 85.93133544921875, -33.5045166015625, 315.3606872558594, -60.92677307128906, 34.0284423828125, 54.078369140625, 0.6033802032470703, 145.30670166015625, 1.0895767211914062, -11.079170227050781, 45.46746826171875, -47.22210693359375, 15.700180053710938, -151.57522583007812, -34.725990295410156, 188.067626953125, -22.757537841796875, 244.7464141845703, 12.42633056640625, 2.83038330078125, 146.14584350585938, 1.1591796875, 232.1376953125, -9.03271484375, -82.25053405761719, -19.059171676635742, -19.06574249267578, 200.5785675048828, -23.513214111328125, 73.24215698242188, -36.83683776855469, 42.94903564453125, 159.43450927734375, 222.8196258544922, 126.31210327148438, 211.96054077148438, 16.629180908203125, -12.153106689453125, 59.28742980957031, 128.40109252929688, 76.98013305664062, 133.2492218017578, 132.63821411132812, 51.82562255859375, 46.344512939453125, 72.4073486328125, 165.83428955078125, 176.58013916015625, 65.72235107421875, 100.11332702636719, 146.83782958984375, 123.74520874023438, 44.38414001464844, 106.36563110351562, -67.8302001953125, 233.555908203125, 15.17041015625, 67.2034912109375, -11.494216918945312, 80.587890625, -77.31230163574219, 12.51458740234375, 171.69480895996094, -54.725677490234375, 64.61866760253906, 61.87103271484375, 57.59375, 10.633880615234375, -30.2996826171875, 66.498779296875, 12.83935546875, 10.422698974609375, 199.11492919921875, 24.010284423828125, -57.759674072265625, -195.13961791992188, 64.82490539550781, 1.910400390625, 60.03645324707031, 138.56613159179688, 1.2010345458984375, 1.7352752685546875, -87.68975830078125, -3.6408615112304688, 8.9368896484375, 38.91082000732422, -157.71841430664062, 155.61602783203125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000221.npy"}
{"epoch": 0.46282722513089003, "step": 222, "batch_size": 128, "mean": 50.875091552734375, "std": 88.1940689086914, "min": -224.08697509765625, "p10": -49.080451202392574, "median": 53.34274673461914, "p90": 178.69774780273437, "max": 253.6588134765625, "pos_frac": 0.7109375, "sample": [101.32632446289062, 2.7266845703125, 246.86477661132812, 109.42684936523438, 225.982666015625, -63.05342102050781, 8.327857971191406, -34.95558166503906, 52.920982360839844, 183.67022705078125, 82.08624267578125, -21.606300354003906, -224.08697509765625, 78.9552001953125, 15.13677978515625, 105.48987579345703, 61.748321533203125, 17.29638671875, 139.59579467773438, 102.200439453125, -35.43920135498047, 103.09330749511719, 62.12689208984375, 98.47406005859375, -8.008018493652344, 102.239501953125, -1.0061702728271484, 98.44476318359375, -100.29510498046875, 200.51547241210938, -17.154930114746094, 20.288986206054688, -30.07025146484375, -26.88555908203125, 52.890869140625, 80.41273498535156, -3.4515380859375, 55.5604248046875, 158.27674865722656, 231.18063354492188, 61.888267517089844, 16.133750915527344, -30.191131591796875, -20.71405029296875, 1.9736900329589844, 187.3946990966797, -101.33425903320312, 178.56146240234375, 106.14013671875, 27.043548583984375, 158.16151428222656, 131.06906127929688, 179.0157470703125, 62.43952941894531, 48.694305419921875, 53.76451110839844, -110.83137512207031, 12.592987060546875, 87.08999633789062, -38.5313720703125, -91.70599365234375, 192.3795166015625, -62.863677978515625, 83.03399658203125, 61.46356201171875, 34.919254302978516, 25.720550537109375, 253.6588134765625, -22.852645874023438, -81.90890502929688, 56.19384765625, -14.632987976074219, 43.47979736328125, 205.7894287109375, 70.01461791992188, -43.549285888671875, 59.99028015136719, 168.03790283203125, 1.6614265441894531, 11.05755615234375, -43.03228759765625, 123.34857177734375, -57.573883056640625, 96.792724609375, 180.76600646972656, -21.274192810058594, 50.98480224609375, 47.829010009765625, -47.45549011230469, 129.29702758789062, 164.21978759765625, 155.20034790039062, 54.7293701171875, 30.08270263671875, 106.82818603515625, 97.91792297363281, 196.86572265625, 14.085906982421875, 48.30908203125, -68.76618194580078, 201.6600341796875, 18.642236709594727, -41.00868225097656, 10.810020446777344, -157.9305877685547, 1.0475921630859375, -88.91197204589844, 69.36654663085938, 121.823486328125, 58.59584045410156, 70.59893798828125, 85.36578369140625, 125.79605102539062, -51.028175354003906, 141.5271453857422, -42.7127685546875, -32.436065673828125, 144.3272705078125, 61.256072998046875, 143.57955932617188, 89.31607818603516, 68.97412872314453, 12.485740661621094, -48.24571228027344, -26.46630859375, -7.142829895019531, 99.55990600585938, 32.512210845947266], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000222.npy"}
{"epoch": 0.4649214659685864, "step": 223, "batch_size": 128, "mean": 55.723541259765625, "std": 87.06228637695312, "min": -152.7905731201172, "p10": -38.7211944580078, "median": 53.985443115234375, "p90": 170.56266479492186, "max": 243.74270629882812, "pos_frac": 0.765625, "sample": [20.48851776123047, -36.52386474609375, 18.7235107421875, 95.220947265625, 108.63223266601562, 42.183441162109375, 146.14630126953125, 136.17478942871094, 197.175537109375, -108.87490844726562, 77.56520080566406, -133.40560913085938, 200.40243530273438, 105.23233032226562, 27.237091064453125, 17.04443359375, 27.205360412597656, 149.44107055664062, -0.9779052734375, 111.28604125976562, 169.278564453125, 2.270172119140625, 2.6636810302734375, 59.70550537109375, 124.18403625488281, -21.251373291015625, 3.7029495239257812, 71.5917739868164, 7.6234130859375, 34.51734924316406, 195.265625, 60.03803253173828, 56.51422119140625, -81.21630859375, -23.87078094482422, 28.05223274230957, 173.55889892578125, -123.09024047851562, 113.44901275634766, -3.9332427978515625, 117.8065185546875, 157.442138671875, 15.509063720703125, 63.15216064453125, 154.119140625, 232.48236083984375, 63.48969268798828, 118.84695434570312, 105.85176086425781, 19.3759765625, 68.8599853515625, 89.63815307617188, 9.422098159790039, 82.080078125, 55.384490966796875, -18.574094772338867, 212.76947021484375, 58.880226135253906, 39.626251220703125, -7.263938903808594, -124.23335266113281, -11.444625854492188, 104.99189758300781, 29.36273193359375, 86.52090454101562, 227.06924438476562, 57.027435302734375, 226.60546875, 9.944633483886719, 63.99847412109375, 36.58441162109375, 47.94835662841797, 47.65718078613281, -152.7905731201172, -6.1220550537109375, 88.8494873046875, -1.1036376953125, 185.03115844726562, 90.52456665039062, 94.64666748046875, -4.518280029296875, 148.83810424804688, -85.55704498291016, -21.435239791870117, 133.80374145507812, 1.589111328125, 114.09359741210938, 114.06063842773438, 102.01632690429688, -24.172706604003906, -27.75133514404297, 73.05677032470703, -133.7350311279297, 45.29890441894531, 102.36859130859375, 27.36297607421875, 37.39683532714844, 12.23394775390625, 47.42401123046875, 78.14450073242188, 180.64793395996094, 52.586395263671875, 143.9263458251953, 15.874176025390625, 78.03057861328125, -43.848297119140625, 108.37295532226562, 19.140228271484375, 41.722320556640625, 58.103878021240234, 82.0465087890625, 162.53515625, -20.63135528564453, -78.60333251953125, 147.03512573242188, 20.542709350585938, -4.288238525390625, -61.60795593261719, 159.33767700195312, -26.729957580566406, 32.87236022949219, 243.6298828125, 222.9600830078125, 243.74270629882812, 6.2895660400390625, 83.46463012695312, -150.05947875976562, -114.39501953125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000223.npy"}
{"epoch": 0.46701570680628274, "step": 224, "batch_size": 128, "mean": 56.46855926513672, "std": 84.28060150146484, "min": -133.40016174316406, "p10": -52.718041992187494, "median": 44.24079895019531, "p90": 171.49242248535157, "max": 284.87921142578125, "pos_frac": 0.7734375, "sample": [190.23367309570312, 222.01519775390625, 34.065093994140625, 11.323654174804688, -41.39093017578125, 89.359130859375, 147.5592041015625, -1.8662109375, -58.56085205078125, 146.88818359375, 35.95033264160156, -50.3712158203125, 106.096923828125, -21.61494255065918, 127.34646606445312, 81.62265014648438, -7.428466796875, 161.60028076171875, 0.655120849609375, -58.1939697265625, 100.39801025390625, 68.29998779296875, 80.76898193359375, 28.30596923828125, -6.6756134033203125, -110.70046997070312, -89.71969604492188, -8.732582092285156, -21.154077529907227, 50.620849609375, 8.559814453125, 32.56683349609375, 58.43757629394531, -71.62789916992188, -71.50418090820312, 106.03314208984375, 46.42755126953125, 8.25277328491211, 119.68694305419922, 28.314987182617188, 69.09454345703125, -41.98725891113281, 111.94094848632812, 200.81436157226562, 0.0, 169.29312133789062, 170.71551513671875, 6.70536994934082, 32.8406982421875, -25.595016479492188, 176.47119140625, 231.86483764648438, -109.41703796386719, 75.10767364501953, -9.91253662109375, 100.559326171875, 99.80281066894531, 156.4052734375, 80.11029052734375, -47.968475341796875, 107.89199829101562, 64.88128662109375, 91.04632568359375, 54.60084533691406, 37.317779541015625, 54.80287170410156, 176.52615356445312, 143.57305908203125, 7.6181182861328125, 38.241455078125, 16.04969024658203, -20.206939697265625, 12.9981689453125, 18.914310455322266, 87.80354309082031, 43.00152587890625, 6.2353515625, -76.331298828125, 6.347381591796875, 36.0557861328125, 3.88153076171875, 117.8497314453125, 20.17840576171875, -67.31085205078125, 0.8481979370117188, 10.702911376953125, 243.53488159179688, -67.01508331298828, 253.67051696777344, -133.40016174316406, 163.2729949951172, 77.20843505859375, 36.467063903808594, 225.50057983398438, 24.122711181640625, 25.764495849609375, -6.76885986328125, 6.294036865234375, 160.97921752929688, 39.011260986328125, 120.7255859375, 104.30488586425781, 121.11053466796875, -104.50443267822266, 54.23565673828125, 40.83636474609375, 173.30520629882812, 54.43832015991211, 108.73515319824219, 15.197998046875, 37.9007568359375, 207.2913055419922, -14.238861083984375, 169.75686645507812, 50.42657470703125, 151.97662353515625, 284.87921142578125, 78.3055419921875, 26.192970275878906, -58.985626220703125, 54.34355163574219, 176.60906982421875, 52.25714111328125, 45.480072021484375, 56.0003662109375, 114.71768951416016, 27.86804962158203, 117.9556884765625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000224.npy"}
{"epoch": 0.46910994764397906, "step": 225, "batch_size": 128, "mean": 50.49940490722656, "std": 94.61904907226562, "min": -205.3035430908203, "p10": -48.24931335449219, "median": 40.30009460449219, "p90": 179.445361328125, "max": 310.29595947265625, "pos_frac": 0.6875, "sample": [310.29595947265625, -32.819580078125, 46.10870361328125, 159.79568481445312, 140.29269409179688, 102.8714599609375, 20.520523071289062, 66.99541473388672, 218.96865844726562, 127.37612915039062, 83.9662094116211, 76.66387939453125, 35.31909942626953, 84.84371948242188, -24.220123291015625, 51.20965576171875, 104.15802001953125, -0.3324432373046875, -62.19482421875, 105.38397216796875, 23.166366577148438, 44.88887023925781, 218.8084716796875, 71.4896240234375, -52.389739990234375, -148.8681640625, -38.33393859863281, 7.072376251220703, 130.28012084960938, 237.95440673828125, 13.18707275390625, 77.65141296386719, 146.80499267578125, 26.728736877441406, -68.08457946777344, -70.15765380859375, -4.97991943359375, -18.723770141601562, -12.213134765625, -45.48185729980469, -14.731597900390625, 6.160980224609375, -24.802032470703125, -23.24181365966797, 39.404998779296875, 97.26385498046875, -10.966949462890625, 108.09957885742188, 45.427452087402344, 174.20947265625, 30.44317626953125, 94.98321533203125, 11.137775421142578, 9.285293579101562, 83.67987060546875, 272.74542236328125, 21.126266479492188, 18.31207275390625, 108.11094665527344, -3.1591033935546875, 86.1783447265625, -39.833953857421875, 49.357025146484375, -28.510833740234375, -24.99505615234375, 138.87191772460938, 19.85955810546875, 185.5838623046875, 66.89212036132812, 246.57415771484375, 15.539932250976562, -47.497802734375, 115.43600463867188, -6.4447021484375, 196.653564453125, 13.646377563476562, -100.96002197265625, -34.95361328125, 146.60098266601562, 112.41839599609375, 92.4837646484375, 171.35194396972656, 10.998779296875, 61.354248046875, -43.53948974609375, 121.11666870117188, 92.5769271850586, -27.551422119140625, 58.382904052734375, -205.3035430908203, 16.215179443359375, -6.36273193359375, 27.468902587890625, -27.14496612548828, 11.4927978515625, 41.1951904296875, 46.16009521484375, -10.77142333984375, 196.61892700195312, 57.42291259765625, -181.36953735351562, 64.23876953125, 134.45753479003906, 34.316864013671875, 63.508056640625, -12.4898681640625, -69.31666564941406, -122.06396484375, 88.15737915039062, 188.32125854492188, 291.06036376953125, 14.76593017578125, 132.3614044189453, 245.48416137695312, -39.102813720703125, 28.25481414794922, 12.041168212890625, 158.745361328125, -50.445159912109375, 87.37258911132812, -109.21099853515625, 55.072998046875, 57.517608642578125, -33.860595703125, -50.002838134765625, 46.898193359375, 176.8145751953125, 260.3236083984375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000225.npy"}
{"epoch": 0.4712041884816754, "step": 226, "batch_size": 128, "mean": 46.009002685546875, "std": 87.20616149902344, "min": -158.103271484375, "p10": -51.60983123779297, "median": 26.541894912719727, "p90": 162.4449295043945, "max": 311.5145263671875, "pos_frac": 0.6953125, "sample": [29.73907470703125, 222.1767578125, -45.17401123046875, 104.5424575805664, 71.78424072265625, 154.63906860351562, -4.6165771484375, 73.90399169921875, -12.524078369140625, 14.232223510742188, 149.69961547851562, 82.87384033203125, -138.4046630859375, 131.61126708984375, 164.79331970214844, -71.32135009765625, 121.06166076660156, -24.550556182861328, 42.40453338623047, 311.5145263671875, 21.580047607421875, 15.59539794921875, 67.34364318847656, 49.20976257324219, 9.474380493164062, 52.91131591796875, 129.18923950195312, -92.22180938720703, -1.0178451538085938, 96.37528991699219, 185.6614990234375, -27.636367797851562, 131.302490234375, 229.34771728515625, 24.686599731445312, -49.41269302368164, 59.743812561035156, -158.103271484375, -2.7393798828125, 73.31739044189453, 27.460433959960938, 3.532012939453125, 114.63754272460938, 97.34052276611328, 173.77731323242188, 42.42136764526367, 25.623355865478516, 11.23236083984375, 2.3104190826416016, 22.0740966796875, -19.42181396484375, 15.536739349365234, 132.2526397705078, 67.80859375, 169.07794189453125, -25.2945556640625, 186.3155517578125, 44.885650634765625, 1.2140350341796875, 69.10140991210938, 136.48068237304688, -86.62052917480469, -74.81939697265625, 232.994140625, -17.807327270507812, 131.2596893310547, 115.61723327636719, 143.6193389892578, -50.82063293457031, 2.8035049438476562, 6.662895202636719, -4.812702178955078, 132.73097229003906, 38.3470458984375, -6.438423156738281, -155.4674072265625, 33.38067626953125, 41.54718017578125, -3.269308090209961, -9.17633056640625, 93.13914489746094, 40.816375732421875, -11.93991470336914, -53.4512939453125, 12.0577392578125, 21.949951171875, 275.0047302246094, 12.43804931640625, 66.719970703125, -17.301544189453125, 96.50094604492188, 79.8682861328125, 43.88336181640625, -93.15911865234375, -90.34283447265625, 111.24542236328125, 9.67547607421875, 128.39500427246094, -17.8348388671875, 42.34536361694336, 10.35821533203125, 148.39959716796875, -2.7496185302734375, -30.368629455566406, 159.94345092773438, 119.1260986328125, 2.8150177001953125, 204.22854614257812, 2.80902099609375, 91.53980255126953, 13.671005249023438, -105.83743286132812, 44.93272399902344, 161.4384765625, 11.594779968261719, 176.53094482421875, 19.284332275390625, -59.552276611328125, -66.70420837402344, -19.495513916015625, 169.10342407226562, -6.020294189453125, 108.40969848632812, 7.8466033935546875, -49.46928405761719, -25.615036010742188, 117.54171752929688, -25.708358764648438], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000226.npy"}
{"epoch": 0.4732984293193717, "step": 227, "batch_size": 128, "mean": 56.37909698486328, "std": 90.7012939453125, "min": -170.95050048828125, "p10": -45.86067276000975, "median": 48.98701858520508, "p90": 170.21773071289059, "max": 261.6466369628906, "pos_frac": 0.7265625, "sample": [-65.43844604492188, 200.5402069091797, 111.99642944335938, -35.600494384765625, 15.550018310546875, 84.37493896484375, 45.41796875, -38.732177734375, 0.0, 200.90289306640625, -167.51116943359375, -166.3232879638672, -2.8326873779296875, 43.21746826171875, 205.51739501953125, 79.80711364746094, -170.95050048828125, -13.90692138671875, 16.177127838134766, 133.42596435546875, 129.16822814941406, 32.40436935424805, -4.6762237548828125, 139.2093505859375, 211.30130004882812, 90.64360046386719, 55.412479400634766, 204.52757263183594, 182.31289672851562, 40.234046936035156, 33.26544189453125, 119.64027404785156, 103.96604919433594, -25.896480560302734, 212.49237060546875, 132.97317504882812, 8.282806396484375, 1.9284591674804688, 19.758941650390625, 197.427490234375, 93.10931396484375, -118.51739501953125, 133.33529663085938, 151.11085510253906, -19.256805419921875, -38.4383544921875, 108.29385375976562, 156.21099853515625, 0.198883056640625, 148.8536376953125, 72.18585205078125, 24.517471313476562, 43.09332275390625, 163.681884765625, -42.06536102294922, 111.31692504882812, 0.261383056640625, 83.80062866210938, 34.32575988769531, 69.63754272460938, -12.42376708984375, 152.33160400390625, 129.90304565429688, -25.089111328125, -36.095436096191406, -69.31690979003906, 165.03408813476562, 98.132568359375, 164.7518310546875, 239.05776977539062, 58.4923095703125, 88.34295654296875, 184.83743286132812, 191.5133056640625, 126.71478271484375, -113.39424133300781, 33.72247314453125, 142.61941528320312, 112.184326171875, 19.56723403930664, 48.689735412597656, -40.23570251464844, 261.6466369628906, 61.237518310546875, 49.2843017578125, 113.4908447265625, -10.084329605102539, 3.2552490234375, 42.855255126953125, 0.71893310546875, -64.51272583007812, 118.55496215820312, -54.716400146484375, 25.293426513671875, 39.11871337890625, 105.57923126220703, 33.05352783203125, -41.68524169921875, -25.569137573242188, 126.0137939453125, 143.2403564453125, -57.962989807128906, 143.07861328125, -34.1895751953125, 129.32843017578125, -6.678375244140625, -36.4908447265625, 138.8437957763672, 229.41758728027344, -99.32418060302734, 84.47454833984375, 90.10353088378906, 124.8272705078125, 53.57073974609375, -123.314453125, 94.69886779785156, 22.72796630859375, 51.241912841796875, 118.12481689453125, -28.2457275390625, -10.22430419921875, -75.33198547363281, 98.59133911132812, 113.67132568359375, 7.8162689208984375, 4.586360931396484, 36.422996520996094, 24.98236083984375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000227.npy"}
{"epoch": 0.47539267015706804, "step": 228, "batch_size": 128, "mean": 60.51764678955078, "std": 86.89804077148438, "min": -159.77239990234375, "p10": -49.20990219116211, "median": 47.99260711669922, "p90": 185.45002136230468, "max": 290.4754333496094, "pos_frac": 0.7578125, "sample": [20.519515991210938, -7.608673095703125, 180.04583740234375, 125.92645263671875, -21.04071044921875, 67.003173828125, 120.6134033203125, 142.11767578125, -17.886077880859375, 210.74032592773438, -44.762298583984375, 73.10464477539062, 81.53909301757812, 117.38572692871094, 67.73291015625, 205.93621826171875, 54.906280517578125, 36.447906494140625, 0.0, 130.11395263671875, 3.65692138671875, 18.918533325195312, 35.716957092285156, 270.1033935546875, -49.89697265625, -41.67375946044922, 47.9473876953125, 69.51580810546875, 7.852630615234375, 106.31258392333984, 146.35240173339844, 205.8978271484375, 16.870681762695312, -54.831398010253906, 192.98828125, 43.55267333984375, 36.73193359375, 48.03782653808594, 42.2374267578125, -23.047698974609375, 142.98745727539062, 55.49434280395508, -48.915443420410156, -31.297195434570312, 111.3651123046875, -16.887266159057617, 195.51904296875, 137.2835235595703, 49.702362060546875, -69.5597915649414, 15.478805541992188, 174.01950073242188, 35.641265869140625, -19.58251953125, -50.583526611328125, 110.9662857055664, 213.4844970703125, 228.4491729736328, -20.659149169921875, 11.670928955078125, 188.8900146484375, 8.48519515991211, 2.0813827514648438, -67.96002197265625, 107.685791015625, 132.6298828125, 19.056743621826172, -56.00333786010742, 121.41546630859375, 29.655242919921875, -45.169708251953125, -56.826171875, 59.8670654296875, 24.9141845703125, 40.40260314941406, 28.05756378173828, 110.7364501953125, 80.11105346679688, 97.29827880859375, 290.4754333496094, 63.26387023925781, 20.65753173828125, -23.507965087890625, 183.97573852539062, 113.04271697998047, 242.66415405273438, 116.35293579101562, -159.77239990234375, 225.63607788085938, 53.549285888671875, 53.763946533203125, 158.0396728515625, 65.35940551757812, 120.1187744140625, 159.095947265625, 87.2934799194336, -72.29644775390625, 131.007568359375, 29.500579833984375, 37.93267822265625, 145.14447021484375, -22.318115234375, -102.4090576171875, 58.707542419433594, -56.497650146484375, 78.15213775634766, 34.86297607421875, 26.186141967773438, -11.382492065429688, 35.38069152832031, -40.81138610839844, -77.41992950439453, -72.111328125, 107.942138671875, 20.665878295898438, 154.34808349609375, 1.45318603515625, -20.580169677734375, 7.9150238037109375, 236.31744384765625, 28.702163696289062, 107.39205932617188, 102.22383117675781, 8.707061767578125, 179.9761962890625, 119.72464752197266, 29.297882080078125, 54.55268478393555], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000228.npy"}
{"epoch": 0.4774869109947644, "step": 229, "batch_size": 128, "mean": 69.79915618896484, "std": 87.63060760498047, "min": -146.23480224609375, "p10": -31.513992309570312, "median": 69.5349006652832, "p90": 182.25275878906248, "max": 270.5589599609375, "pos_frac": 0.8125, "sample": [129.28515625, -11.57183837890625, 153.25216674804688, 133.3114013671875, 187.32867431640625, 133.89910888671875, 47.56488037109375, 130.21533203125, 78.82846069335938, 10.73101806640625, 68.16333770751953, 32.123779296875, 65.88212585449219, 149.63827514648438, 126.36636352539062, 4.904731750488281, 9.046150207519531, -49.24805450439453, 126.67095947265625, 81.77397155761719, 98.07098388671875, 197.171630859375, 34.77874755859375, 70.90646362304688, 164.48092651367188, 153.41616821289062, 117.6485595703125, 201.70843505859375, 77.04949951171875, 93.68319702148438, 85.64453125, 98.76971435546875, -19.6844482421875, -122.4697265625, -17.331802368164062, 37.383697509765625, 122.31768798828125, 31.166259765625, 162.30990600585938, -146.23480224609375, 147.86477661132812, 47.50890350341797, 239.04940795898438, 75.15042114257812, 35.74274444580078, -2.28875732421875, 75.45781707763672, 93.30601501464844, 135.3883514404297, 51.440185546875, 5.3747406005859375, -12.424674987792969, 75.97311401367188, 37.17134094238281, 176.82608032226562, 120.15939331054688, 21.34067153930664, 14.84210205078125, -124.99588012695312, 108.92353820800781, 171.81207275390625, -31.6956787109375, 45.18785095214844, -62.450927734375, 37.0443115234375, -124.47695922851562, 223.03536987304688, 142.57955932617188, 11.29278564453125, -116.38165283203125, 59.25846862792969, 88.69082641601562, 28.4427490234375, 9.572174072265625, 24.129669189453125, 33.63117980957031, 187.017578125, 9.886505126953125, 23.602615356445312, -50.73773193359375, -6.231300354003906, -21.522689819335938, 112.97711181640625, 108.99809265136719, -31.436126708984375, 73.04327392578125, -85.62124633789062, 17.425933837890625, 213.215087890625, 23.279312133789062, 180.210693359375, 84.9583511352539, 270.5589599609375, 13.968231201171875, 102.98724365234375, 120.50979614257812, 125.36990356445312, -68.08103942871094, 235.59552001953125, 156.08624267578125, 19.23419189453125, 257.51513671875, 203.35079956054688, 40.094818115234375, 131.83851623535156, 88.09305572509766, 15.057388305664062, 164.94369506835938, 27.805002212524414, -47.32030487060547, 166.152587890625, 101.5164794921875, 30.952362060546875, 102.09928894042969, 105.52047729492188, 64.422119140625, 33.12266540527344, 251.114013671875, 23.803007125854492, -82.64337158203125, 16.80665397644043, -12.977020263671875, 20.32366943359375, 176.0257568359375, 93.3209228515625, -14.497522354125977, 253.147705078125, -1.0245590209960938], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000229.npy"}
{"epoch": 0.47958115183246075, "step": 230, "batch_size": 128, "mean": 60.782344818115234, "std": 96.50050354003906, "min": -272.3831787109375, "p10": -37.216243743896484, "median": 31.72716522216797, "p90": 176.678857421875, "max": 405.8936767578125, "pos_frac": 0.734375, "sample": [150.88507080078125, 64.78445434570312, 130.898681640625, 226.2410888671875, 21.466690063476562, 99.21682739257812, 2.171661376953125, -40.151611328125, 180.44204711914062, -27.803203582763672, -0.30804443359375, 39.8148193359375, 146.5203857421875, 8.14227294921875, -7.5681304931640625, -28.468002319335938, 24.282485961914062, 11.17486572265625, -37.99134826660156, 167.0858154296875, 6.774806976318359, 1.8259735107421875, 19.892480850219727, 399.5414733886719, 73.96000671386719, 122.979248046875, 64.41078186035156, 79.39399719238281, 235.84153747558594, -17.002262115478516, 101.093505859375, 73.40695190429688, 193.900634765625, 103.73675537109375, 136.96640014648438, -7.690338134765625, 147.999755859375, 176.87893676757812, 26.6348876953125, 77.25123596191406, 3.689727783203125, 33.02245330810547, 227.74063110351562, -49.25233459472656, -93.67279052734375, 28.060806274414062, 133.1546630859375, -79.73261260986328, 26.533615112304688, 176.59310913085938, 103.5543212890625, 5.772796630859375, 130.22828674316406, -8.565210342407227, 109.91682434082031, 27.758695602416992, 33.98799133300781, -192.2607421875, 27.065628051757812, -49.044281005859375, 16.63093376159668, 173.53094482421875, 262.349365234375, -8.720901489257812, 150.835205078125, -6.1390380859375, -0.6058349609375, 29.782135009765625, 5.933881759643555, -53.33636474609375, 28.046356201171875, 52.367156982421875, 0.0, 175.54641723632812, -20.85101318359375, 30.43187713623047, 126.79306030273438, -3.501129150390625, 113.37765502929688, 62.382476806640625, -23.826431274414062, -45.86876678466797, 78.2613525390625, -34.86883544921875, 64.1005630493164, 65.431396484375, -10.6785888671875, 405.8936767578125, 179.35720825195312, 115.13092041015625, 214.8455810546875, 67.09150695800781, -47.30150604248047, 89.859619140625, 20.929000854492188, 13.28778076171875, -8.232254028320312, -96.56111907958984, -272.3831787109375, 200.4657745361328, 24.32601547241211, 135.0491943359375, 12.326301574707031, 41.48736572265625, -5.229217529296875, 15.515419006347656, 139.4993896484375, 143.66151428222656, -40.99229049682617, 70.8214111328125, 157.94207763671875, 111.01702880859375, -1.18914794921875, 18.64585304260254, 150.19354248046875, 160.69015502929688, 55.97283935546875, 12.53004264831543, 120.27937316894531, 220.34527587890625, -36.884056091308594, 29.82977294921875, 144.45864868164062, 113.137451171875, 29.11468505859375, 109.39227294921875, 13.3853759765625, -18.158462524414062], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000230.npy"}
{"epoch": 0.4816753926701571, "step": 231, "batch_size": 128, "mean": 47.92451858520508, "std": 83.47175598144531, "min": -179.55145263671875, "p10": -55.97111206054687, "median": 44.963043212890625, "p90": 160.1705139160156, "max": 279.3790283203125, "pos_frac": 0.75, "sample": [35.23750305175781, 55.532470703125, -16.37347412109375, -93.3825454711914, 40.8458251953125, -26.066486358642578, 30.04669189453125, 12.183906555175781, 3.780364990234375, 24.175094604492188, 18.184066772460938, 36.14581298828125, -22.8131103515625, 64.4183349609375, 106.03680419921875, 94.2296142578125, -27.822139739990234, 41.327606201171875, 100.86343383789062, 19.659210205078125, 59.47218322753906, 31.35888671875, 259.75726318359375, -0.05277824401855469, 38.15203857421875, 75.90121459960938, 44.59968566894531, -7.390289306640625, 119.44560241699219, -87.61444091796875, 100.45449829101562, 192.52584838867188, 117.08384704589844, 142.53993225097656, 70.9404296875, -26.692123413085938, 279.3790283203125, 205.92974853515625, -54.169158935546875, 134.6934814453125, -128.57797241210938, -75.36355590820312, -20.87359619140625, 53.37677001953125, 61.92268371582031, -55.328521728515625, 86.14642333984375, 29.486602783203125, 61.79351806640625, 62.19703674316406, 84.81547546386719, 106.44577026367188, -72.2928466796875, 47.2158203125, 32.39445495605469, 186.541748046875, 21.49374771118164, 20.695281982421875, -19.36578369140625, 127.5469970703125, 62.40643310546875, 101.54977416992188, 2.367034912109375, 25.75372314453125, -17.18292236328125, -94.63663482666016, 166.64218139648438, -53.36981201171875, 157.39694213867188, 79.24603271484375, 125.29171752929688, 8.981124877929688, 37.03355407714844, 98.34764099121094, -3.7308273315429688, 72.81161499023438, 150.34552001953125, 92.88702392578125, 29.80364227294922, 208.77560424804688, 191.49761962890625, -125.95498657226562, -179.55145263671875, 26.44268798828125, 22.354598999023438, 9.887420654296875, 64.01618957519531, 179.0875244140625, 78.99459838867188, 129.09786987304688, 85.28878784179688, 79.15264892578125, -139.89434814453125, 88.63107299804688, 215.50930786132812, -96.5350341796875, 104.87069702148438, 84.7882080078125, 24.694839477539062, 66.09825134277344, 189.44635009765625, 57.935760498046875, -39.8955078125, 50.98991394042969, 16.03204345703125, -71.40933227539062, 178.39593505859375, 127.20993041992188, -89.53865051269531, 67.92848205566406, 1.5107421875, 15.593887329101562, 12.387706756591797, -16.989166259765625, 121.2945556640625, 56.510498046875, 58.93743896484375, 45.32640075683594, -57.470489501953125, 25.620941162109375, -51.60340881347656, 120.15434265136719, -40.9677734375, 137.7326202392578, 175.59603881835938, 0.0, 13.079864501953125, 68.53936767578125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000231.npy"}
{"epoch": 0.4837696335078534, "step": 232, "batch_size": 128, "mean": 51.095001220703125, "std": 91.87593841552734, "min": -196.53018188476562, "p10": -46.97623519897461, "median": 45.265018463134766, "p90": 171.45148010253905, "max": 317.404052734375, "pos_frac": 0.7265625, "sample": [-161.62200927734375, 70.26846313476562, -8.161239624023438, 91.6033935546875, -74.3770751953125, 104.065673828125, 317.404052734375, 142.76324462890625, -4.0988922119140625, -72.98078155517578, 17.81610107421875, 64.70406341552734, 133.75279235839844, 0.3375358581542969, -8.12567138671875, -45.86358642578125, 96.02696228027344, 215.49913024902344, -3.613037109375, 74.768310546875, 41.63972473144531, 51.209739685058594, 50.160003662109375, 88.922607421875, 2.078481674194336, 11.5269775390625, -54.66009521484375, 19.706268310546875, -46.633216857910156, -102.49554443359375, -45.478179931640625, -35.710235595703125, 47.13520812988281, 103.7218017578125, 48.301422119140625, 75.61065673828125, -38.424652099609375, 51.625701904296875, 77.58474731445312, 186.3198699951172, 64.47021484375, 43.39482879638672, -196.53018188476562, 21.177764892578125, 166.3392333984375, 27.87261962890625, 82.41326141357422, 112.12557983398438, -2.5689544677734375, 66.5584716796875, 54.01805877685547, 152.80838012695312, 123.51696014404297, 147.62332153320312, 176.8697509765625, -99.07034301757812, 76.01177978515625, -33.110965728759766, 16.950292587280273, 141.99554443359375, 192.17221069335938, -21.409881591796875, 71.4580078125, -109.384765625, 22.3663330078125, 88.9881591796875, 86.34480285644531, 232.29522705078125, -22.95501708984375, 17.75727081298828, 0.38733482360839844, -56.03404235839844, 222.01220703125, 177.67431640625, 75.79298400878906, 36.998504638671875, 131.18917846679688, 192.6943817138672, 4.1218719482421875, 131.70230102539062, 5.779693603515625, -161.1806640625, 211.81224060058594, 169.12936401367188, 202.93304443359375, 183.9176025390625, 40.6580810546875, -166.01654052734375, -14.32769775390625, -47.776611328125, 164.24600219726562, 15.56280517578125, 154.71771240234375, 118.86474609375, -123.57295227050781, 153.90826416015625, 69.44027709960938, -25.422698974609375, -21.81781005859375, 16.768630981445312, 2.543121337890625, 7.093040466308594, 1.2791824340820312, 49.86431884765625, -29.727153778076172, 156.5421142578125, 28.78558349609375, 20.025985717773438, 61.307212829589844, 250.039306640625, 125.44558715820312, -18.84539794921875, -0.4266510009765625, 20.05657958984375, 6.047088623046875, 63.1820068359375, 149.0791015625, 86.23724365234375, 16.674285888671875, 161.52432250976562, 0.0, 139.14801025390625, 24.595298767089844, -5.792724609375, 115.54466247558594, 81.02999877929688, 6.08673095703125, -24.14813232421875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000232.npy"}
{"epoch": 0.48586387434554973, "step": 233, "batch_size": 128, "mean": 60.47126770019531, "std": 88.28166961669922, "min": -189.7015380859375, "p10": -33.86321868896484, "median": 44.00528335571289, "p90": 173.7095809936523, "max": 304.4705810546875, "pos_frac": 0.75, "sample": [-121.85074615478516, 124.8581771850586, 39.982269287109375, -0.987060546875, 198.7286376953125, -1.9494476318359375, 86.57131958007812, 63.32135009765625, 77.91851806640625, 304.4705810546875, 124.29124450683594, 170.28305053710938, -34.396240234375, 108.05243682861328, 301.3912658691406, 134.87010192871094, 133.52206420898438, -40.308685302734375, -2.471832275390625, 75.92674255371094, 11.772706985473633, 93.66609191894531, -27.33636474609375, 181.70481872558594, 30.105300903320312, -60.24540710449219, 10.57135009765625, 146.81590270996094, 223.38644409179688, 30.195053100585938, 83.06851196289062, 102.43260192871094, -8.624465942382812, 153.0147705078125, -16.037139892578125, 67.12139892578125, 24.3551025390625, -14.258926391601562, 100.99751281738281, 243.02853393554688, 208.21469116210938, 154.12173461914062, 155.36724853515625, 135.774658203125, 104.83724975585938, 39.26428985595703, 47.0147705078125, -2.12847900390625, -77.03067016601562, -56.32330322265625, 67.96340942382812, 186.17913818359375, -1.79351806640625, -10.993770599365234, -31.03368377685547, -162.55377197265625, -33.63478088378906, 92.75302124023438, 197.64898681640625, 21.864471435546875, 6.361904144287109, 128.20123291015625, 33.81451416015625, 74.12893676757812, 23.018720626831055, 19.802139282226562, 43.967132568359375, 136.2299041748047, -54.76737976074219, -1.2299137115478516, 29.68832778930664, 30.06103515625, 26.57122039794922, 98.64927673339844, 146.06954956054688, 125.6568603515625, 202.06475830078125, -21.6865234375, 31.11627197265625, 8.35113525390625, 108.043212890625, 120.19184875488281, 242.587646484375, 122.03640747070312, 222.29953002929688, 86.91220092773438, 42.03569030761719, 166.7937469482422, 10.348709106445312, 141.29544067382812, -21.73468780517578, 26.212005615234375, 105.21281433105469, 3.3660926818847656, -30.831298828125, 82.2171630859375, 135.7781982421875, 16.750030517578125, 27.672958374023438, 48.6298828125, -11.96331787109375, 195.63104248046875, 68.7396240234375, 44.043434143066406, 7.069183349609375, -108.8921890258789, 80.07318115234375, -65.67327880859375, 85.98419189453125, 123.0413818359375, -99.05514526367188, 104.00730895996094, 42.04718017578125, 28.56329345703125, 24.283992767333984, -50.947113037109375, 60.6768798828125, 13.119491577148438, 15.485572814941406, 103.96392822265625, -189.7015380859375, 147.21847534179688, -8.941818237304688, -15.46087646484375, 159.27374267578125, 66.16287231445312, 3.80755615234375, 22.40936279296875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000233.npy"}
{"epoch": 0.48795811518324606, "step": 234, "batch_size": 128, "mean": 55.17009735107422, "std": 95.20023345947266, "min": -169.12908935546875, "p10": -77.63809661865234, "median": 51.30726623535156, "p90": 176.17608489990235, "max": 309.09271240234375, "pos_frac": 0.6796875, "sample": [-56.552581787109375, -98.930908203125, -20.603057861328125, 105.94133758544922, 91.31988525390625, 176.55316162109375, 171.8175048828125, -86.5479965209961, 102.97966003417969, -13.93701171875, 24.76605224609375, 144.34829711914062, -3.6778793334960938, 47.926185607910156, 9.837043762207031, 34.13482666015625, -2.530496597290039, 32.99053955078125, -3.321563720703125, 0.0, 137.54824829101562, 109.4130859375, -15.266555786132812, -6.943359375, -3.0377349853515625, 176.0144805908203, 125.7669677734375, -32.15399169921875, -131.3747100830078, 34.599365234375, 123.218994140625, 123.96683502197266, 165.76303100585938, 65.35726928710938, 187.6915283203125, 59.454376220703125, 42.1480712890625, 48.817474365234375, -40.55963134765625, -11.05633544921875, 76.73088073730469, -0.383697509765625, 81.93292236328125, 121.39608764648438, 160.34622192382812, 135.41343688964844, -4.7962188720703125, 219.78961181640625, 64.9349365234375, -0.0852508544921875, 37.041343688964844, 95.22256469726562, 141.4855194091797, 0.0, 65.1021728515625, 4.799530029296875, 144.61215209960938, 37.381744384765625, 146.4123077392578, 55.37615966796875, -38.942413330078125, -39.2093505859375, -111.97653198242188, 35.4849853515625, -75.58859252929688, 235.546142578125, 137.14407348632812, -169.12908935546875, 115.7222900390625, 11.163848876953125, -9.163299560546875, -82.42027282714844, 35.52935791015625, -109.72920989990234, 166.8108367919922, 121.59483337402344, 179.17991638183594, 65.28047943115234, 125.97427368164062, 7.46429443359375, -29.430419921875, 25.080368041992188, 170.77142333984375, 221.51425170898438, 309.09271240234375, 229.72227478027344, -82.69195556640625, 41.5152587890625, -152.43310546875, 155.08843994140625, -67.52288818359375, 189.51187133789062, 93.32534790039062, -9.84698486328125, -103.68109130859375, 128.33717346191406, 179.92111206054688, 129.15843200683594, -91.3372802734375, 97.131103515625, 72.807373046875, -16.558029174804688, 11.545700073242188, 128.7696533203125, 36.083412170410156, 12.82171630859375, 13.185043334960938, 53.79705810546875, -70.70176696777344, 34.79657745361328, 201.3975830078125, 60.554473876953125, 227.82119750976562, 133.11474609375, -121.03239440917969, 90.50680541992188, 169.84042358398438, 96.052490234375, 110.3072738647461, -42.68511962890625, 37.23077392578125, 67.6573486328125, 65.3172607421875, -95.25932312011719, -71.76644897460938, 225.26589965820312, 55.9759521484375, 142.3714599609375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000234.npy"}
{"epoch": 0.4900523560209424, "step": 235, "batch_size": 128, "mean": 58.2130012512207, "std": 88.18122863769531, "min": -159.91552734375, "p10": -34.61411209106444, "median": 49.11591720581055, "p90": 171.5685302734375, "max": 277.31475830078125, "pos_frac": 0.7421875, "sample": [26.47955322265625, 15.648368835449219, -17.93377685546875, 110.68466186523438, 115.52029418945312, 199.72042846679688, 48.4215087890625, 157.0978240966797, 135.63726806640625, 140.60574340820312, 8.387033462524414, -26.722198486328125, -81.91226196289062, 93.98240661621094, 186.09857177734375, 88.5225830078125, 207.62844848632812, 21.304988861083984, -10.10589599609375, -134.53846740722656, 42.14579772949219, 28.758544921875, 136.10838317871094, 8.338407516479492, 129.76243591308594, 7.583160400390625, -134.0250244140625, 30.979820251464844, -22.16303253173828, -130.62664794921875, 29.564132690429688, 138.8533935546875, 17.333751678466797, -100.69757080078125, -18.51925277709961, -102.24087524414062, 277.31475830078125, 133.63796997070312, 32.0694694519043, 106.94049072265625, 111.34941101074219, 62.87335205078125, 135.53372192382812, 128.30596923828125, 6.440210342407227, -11.614593505859375, 152.8883056640625, 63.898590087890625, -3.8142242431640625, 131.0870361328125, 49.810325622558594, 93.73779296875, -26.58891487121582, 178.03189086914062, -14.213415145874023, -31.43719482421875, 35.84849548339844, -16.17853546142578, 157.0437774658203, 171.60189819335938, 133.58749389648438, -159.91552734375, -16.06757354736328, -30.345985412597656, 154.60792541503906, 61.3961181640625, 16.430030822753906, 234.02505493164062, -31.54486846923828, 110.48904418945312, 4.293769836425781, -64.28863525390625, 190.90232849121094, 120.53158569335938, 103.04373168945312, -65.79464721679688, -19.353927612304688, 128.68138122558594, -73.20623779296875, 16.325408935546875, -0.482635498046875, 44.973663330078125, 15.66904067993164, 15.24896240234375, 144.5380859375, 86.94094848632812, 13.673751831054688, 59.510467529296875, 70.27362060546875, 4.8582000732421875, 201.62252807617188, 59.530731201171875, 55.24462890625, 130.84486389160156, 43.583282470703125, 205.2010955810547, 167.98773193359375, 68.27255249023438, 3.5019378662109375, 110.01055145263672, 179.830322265625, 141.4454345703125, -141.80096435546875, -13.934585571289062, 130.52769470214844, -45.08073425292969, 101.5953369140625, -2.82452392578125, -1.9462776184082031, 36.58551025390625, 142.41587829589844, -14.673233032226562, 1.785308837890625, 262.8153076171875, 33.1998291015625, 84.74566650390625, 101.3157958984375, 5.08685302734375, 33.519927978515625, 45.237083435058594, 117.43328094482422, 200.9478759765625, 91.60107421875, 117.96139526367188, 171.55422973632812, 58.63322448730469, 70.01382446289062, -41.77568054199219], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000235.npy"}
{"epoch": 0.49214659685863876, "step": 236, "batch_size": 128, "mean": 53.632568359375, "std": 90.95767974853516, "min": -182.0224609375, "p10": -44.82255249023437, "median": 47.278594970703125, "p90": 185.96146240234376, "max": 256.05828857421875, "pos_frac": 0.75, "sample": [38.213043212890625, 26.66949462890625, -93.61801147460938, 32.212745666503906, 71.08934783935547, 12.496063232421875, 18.921432495117188, 66.07095336914062, -2.5262832641601562, 139.35618591308594, 80.69247436523438, 83.18829345703125, 7.459747314453125, 0.0, 7.48626708984375, 11.419181823730469, -36.84336853027344, -40.3292236328125, 120.31005859375, -75.72314453125, 112.52841186523438, 52.795501708984375, 132.078369140625, 147.76968383789062, 80.55130004882812, 82.00482177734375, -36.8375244140625, 114.87055969238281, -33.53650665283203, 68.9977798461914, 165.89505004882812, 193.482666015625, 52.06053161621094, 21.28094482421875, 14.868270874023438, 207.84817504882812, -131.10186767578125, 161.38311767578125, 242.06907653808594, -2.6721954345703125, 45.3228759765625, 33.18696594238281, 49.23431396484375, 58.250396728515625, 103.33740234375, 167.28492736816406, 14.55207633972168, -44.113922119140625, 65.99905395507812, 14.744560241699219, -46.2076416015625, 108.7447509765625, 127.63949584960938, 49.8221435546875, -26.802352905273438, 3.857452392578125, 96.00912475585938, 85.450927734375, 66.11976623535156, 120.944091796875, 256.05828857421875, 148.75668334960938, 1.8170814514160156, -90.5078125, -129.34207153320312, -34.13941955566406, -84.1905517578125, 77.76814270019531, 153.76548767089844, 82.6708755493164, 161.1260986328125, 32.773193359375, -23.015426635742188, 214.5924072265625, 29.572525024414062, 27.29482650756836, -182.0224609375, 190.16259765625, 25.97210693359375, -26.6932373046875, 51.25334930419922, 170.682861328125, -26.66461944580078, 14.472023010253906, 0.0, 57.890472412109375, -11.380889892578125, 185.13424682617188, 27.54505157470703, 31.581024169921875, 83.61245727539062, 101.57684326171875, 238.26504516601562, 54.37566375732422, 219.19253540039062, 11.77685546875, -148.53579711914062, 145.78158569335938, 83.5950927734375, 165.98406982421875, 107.4654541015625, -5.369274139404297, -103.35725402832031, -1.03570556640625, 17.597930908203125, 221.523193359375, 61.414794921875, 30.42572021484375, 37.42832946777344, 55.507598876953125, 68.50787353515625, 234.88095092773438, -90.03662109375, -14.380859375, 188.3017578125, 3.827669143676758, -146.93504333496094, 215.43911743164062, 40.01957702636719, -78.86463165283203, 119.91122436523438, 8.5501708984375, 18.3819580078125, 187.89163208007812, -44.22894287109375, 167.6463623046875, 0.18131256103515625, 69.45944213867188], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000236.npy"}
{"epoch": 0.4942408376963351, "step": 237, "batch_size": 128, "mean": 55.02622985839844, "std": 83.30554962158203, "min": -193.60476684570312, "p10": -31.073402404785153, "median": 38.443450927734375, "p90": 167.97518920898438, "max": 274.3171691894531, "pos_frac": 0.7421875, "sample": [127.8919677734375, 22.413040161132812, 36.877777099609375, 208.33953857421875, -95.24456787109375, 31.263870239257812, -2.9613189697265625, 46.80426025390625, 78.56307983398438, -18.264801025390625, 274.3171691894531, 46.66099548339844, 88.4781494140625, -3.6293067932128906, 42.73252868652344, 169.59359741210938, 70.64762878417969, 12.670135498046875, 124.6112060546875, 148.81134033203125, -8.970947265625, 14.912506103515625, 98.87557983398438, 33.67710876464844, 65.84442138671875, 42.437957763671875, 227.71072387695312, -28.404327392578125, -30.016632080078125, 40.009124755859375, 1.1655120849609375, 79.9620361328125, -50.033050537109375, 77.60991668701172, 29.561203002929688, 83.12713623046875, -39.560150146484375, 14.348541259765625, -97.12570190429688, -33.53919982910156, 79.21937561035156, 111.59376525878906, 167.28158569335938, 156.36839294433594, 81.15298461914062, -3.8261566162109375, 109.54629516601562, 165.03677368164062, -2.58642578125, -80.43426513671875, 1.9704132080078125, 174.28219604492188, 22.5582275390625, -23.116065979003906, 152.7540283203125, 5.397373199462891, 156.2028045654297, 0.874420166015625, 1.8476181030273438, -18.39178466796875, -193.60476684570312, 71.0224609375, 181.2574462890625, 147.82012939453125, 111.14495849609375, 104.60617065429688, -13.610565185546875, 50.16880798339844, 21.065841674804688, -7.67535400390625, 30.162864685058594, 107.91253662109375, -47.11032485961914, 28.37664794921875, 81.64277648925781, 0.0, 154.60809326171875, 145.85061645507812, 9.684463500976562, 40.353668212890625, -19.881317138671875, 85.67645263671875, 185.60418701171875, 79.21490478515625, 26.388935089111328, 45.514617919921875, 89.21923828125, -69.06819152832031, 269.8857727050781, 13.21600341796875, 27.133880615234375, 192.5689697265625, 125.27072143554688, -16.977386474609375, 0.2002105712890625, 131.72840881347656, 138.83502197265625, -15.155929565429688, -10.70135498046875, -29.378021240234375, 29.2335205078125, -37.66552734375, 208.67929077148438, 34.7763671875, 118.86813354492188, 28.300262451171875, 67.66149139404297, -16.513595581054688, 202.82159423828125, 43.81269073486328, 31.544158935546875, 36.66766357421875, -14.282577514648438, 11.983489990234375, 210.24937438964844, 34.6766357421875, -51.658447265625, 28.142578125, 42.3013916015625, 230.8189697265625, -98.79319763183594, 9.962921142578125, -113.8492431640625, 111.2281494140625, 103.93461608886719, 159.7201690673828, 47.248748779296875, 120.6162109375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000237.npy"}
{"epoch": 0.4963350785340314, "step": 238, "batch_size": 128, "mean": 56.16621780395508, "std": 87.56217956542969, "min": -118.29547119140625, "p10": -38.823492431640624, "median": 42.32478332519531, "p90": 176.28565979003903, "max": 312.2730712890625, "pos_frac": 0.734375, "sample": [42.973358154296875, 28.0286865234375, -7.782958984375, 22.41114044189453, 295.215576171875, -36.531982421875, 7.8837890625, 90.89697265625, 186.4385986328125, 213.5792694091797, -91.35472869873047, 109.78974151611328, 82.50140380859375, 74.21670532226562, 277.83746337890625, 200.6156005859375, 179.83169555664062, 59.84254455566406, 32.83570861816406, 151.69456481933594, 41.63885498046875, 44.694252014160156, 41.35863494873047, -28.019676208496094, 74.98892211914062, 235.69729614257812, 12.02669906616211, -15.847251892089844, -18.565174102783203, 24.314544677734375, 83.4820556640625, -6.4949798583984375, 48.29991149902344, 201.02798461914062, 105.42903137207031, 127.06069946289062, 38.540679931640625, 112.36285400390625, 9.763206481933594, 100.4605712890625, 167.20217895507812, 146.51123046875, 46.9998779296875, 142.03341674804688, -12.57574462890625, 2.36944580078125, 103.01174926757812, 20.862945556640625, 120.43508911132812, 73.38404083251953, -84.10687255859375, -82.90478515625, 23.767807006835938, 190.15667724609375, 159.5927734375, 16.5040283203125, -0.7808837890625, 41.67620849609375, 83.86286163330078, 4.251323699951172, 79.97830200195312, 149.82086181640625, -9.213706970214844, 0.032623291015625, 48.0579833984375, -106.521728515625, 0.896881103515625, 145.86744689941406, 131.6976318359375, 51.52325439453125, 55.8001708984375, 308.09881591796875, 131.70242309570312, 127.69861602783203, -53.81256103515625, 46.73619079589844, 24.899017333984375, -15.341424942016602, 55.829010009765625, 129.98451232910156, -19.690757751464844, 30.33026123046875, 71.60130310058594, 60.82379150390625, -7.700359344482422, 3.5784149169921875, -54.75746154785156, 187.30938720703125, 13.949230194091797, 48.58148193359375, 10.158454895019531, -14.094451904296875, 91.86776733398438, 312.2730712890625, 58.65679931640625, -38.093780517578125, 1.7802734375, 3.65570068359375, 76.51420593261719, -118.29547119140625, 36.5999755859375, 140.92544555664062, 97.15396118164062, 73.86370849609375, 27.11822509765625, -9.083709716796875, -45.25764465332031, -106.1519546508789, 2.0412979125976562, 26.009033203125, -48.716705322265625, -78.12384033203125, 174.76593017578125, -68.01071166992188, 73.82914733886719, 52.1572265625, -31.71307373046875, -11.06884765625, -10.75177001953125, 121.67730712890625, -40.526153564453125, 101.948486328125, 233.66311645507812, -19.58990478515625, -3.9086570739746094, -2.51434326171875, 165.26617431640625, 0.0963134765625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000238.npy"}
{"epoch": 0.49842931937172774, "step": 239, "batch_size": 128, "mean": 47.45079040527344, "std": 100.31330108642578, "min": -308.69122314453125, "p10": -58.73486175537109, "median": 28.937599182128906, "p90": 186.43423156738282, "max": 313.48565673828125, "pos_frac": 0.7109375, "sample": [-58.858367919921875, -58.68193054199219, 242.39083862304688, 7.399200439453125, 143.93804931640625, -10.58905029296875, 231.704833984375, -45.68505859375, 177.16522216796875, 204.77069091796875, 30.640274047851562, -29.669281005859375, -77.47610473632812, -13.18365478515625, -101.53927612304688, 56.14599609375, 118.5318832397461, 43.4615478515625, 130.49346923828125, -48.90704345703125, 92.52394104003906, 25.115386962890625, 108.45465087890625, 68.31198120117188, -3.7686767578125, -46.26457977294922, 269.4128112792969, -24.518463134765625, 118.25688171386719, 242.21810913085938, 145.81686401367188, 0.0, -21.264739990234375, 10.759490966796875, -201.669677734375, 49.513999938964844, 34.204498291015625, 186.6630859375, 234.0076904296875, 313.48565673828125, 17.079132080078125, 287.9840087890625, 7.301544189453125, 83.61688232421875, -62.3402099609375, -104.59689331054688, 55.350494384765625, 23.0162353515625, 0.2939453125, 183.78616333007812, -1.7901687622070312, 25.359657287597656, -84.29425811767578, 218.79421997070312, 39.11334228515625, 58.19224548339844, 32.390899658203125, 211.99896240234375, -113.85708618164062, 33.10718536376953, 36.34718322753906, 1.5661773681640625, 170.33644104003906, -5.95513916015625, 117.52906799316406, -14.059196472167969, 66.04803466796875, -308.69122314453125, 139.888427734375, -116.09503173828125, 95.72007751464844, -33.40013122558594, -25.349517822265625, 136.4532470703125, 5.965049743652344, 69.83586120605469, 48.281463623046875, 11.993789672851562, 5.031181335449219, -121.85000610351562, 102.30661010742188, 90.221923828125, 64.74490356445312, 1.75, 84.6932373046875, 8.825469970703125, -70.25350952148438, 55.00885009765625, 23.180618286132812, -42.18206787109375, 134.55841064453125, -3.3000411987304688, 3.65771484375, 13.000045776367188, 65.8709716796875, 102.122802734375, -31.01409912109375, -11.35064697265625, 170.802734375, 54.715087890625, 181.48214721679688, -41.996246337890625, -25.7000732421875, 86.4862060546875, 84.784912109375, 15.567657470703125, -26.19012451171875, 186.33615112304688, 27.23492431640625, 151.6788330078125, 9.389204025268555, 95.34895324707031, 66.06927490234375, 7.47412109375, 8.785064697265625, -94.20677185058594, 82.71026611328125, 274.01019287109375, 227.80197143554688, 77.03933715820312, 24.47905731201172, 48.08935546875, 35.967498779296875, 16.346927642822266, 20.19171142578125, -28.602249145507812, 0.6534309387207031, 11.697357177734375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000239.npy"}
{"epoch": 0.5005235602094241, "step": 240, "batch_size": 128, "mean": 63.445770263671875, "std": 90.7201919555664, "min": -146.8692626953125, "p10": -36.65381164550781, "median": 52.483306884765625, "p90": 189.4952667236328, "max": 410.0437316894531, "pos_frac": 0.75, "sample": [24.25942611694336, 20.726715087890625, 50.4376220703125, 121.43907165527344, 410.0437316894531, 43.64802551269531, -68.75189208984375, 2.3853759765625, 145.50949096679688, 116.64463806152344, 96.2725830078125, 39.51118469238281, 153.8643035888672, 78.71614074707031, 138.16915893554688, 63.026458740234375, 232.32574462890625, 183.05116271972656, 52.7353515625, 34.716705322265625, -71.64295959472656, 134.41578674316406, 34.57322692871094, 93.06643676757812, -38.48579406738281, 40.6103515625, 117.04104614257812, 53.985931396484375, 38.2451171875, -86.39531707763672, 63.27375793457031, -7.1252288818359375, -54.949134826660156, 59.86385726928711, 92.0072021484375, 64.5494384765625, 215.30426025390625, 105.89976501464844, 120.34021759033203, 22.905967712402344, -3.830718994140625, 69.79046630859375, 120.91937255859375, 37.4171142578125, 0.0, 8.627311706542969, -145.52780151367188, 38.160552978515625, 164.43896484375, 28.944778442382812, 156.8521728515625, -36.25897216796875, 10.51715087890625, 26.443601608276367, 9.670211791992188, -30.342788696289062, 86.84927368164062, 173.61465454101562, -21.982666015625, 212.46609497070312, -137.238037109375, 119.03512573242188, -24.88800048828125, 212.8310546875, 0.0, 149.67062377929688, 60.97479248046875, -146.8692626953125, 116.57992553710938, 88.19863891601562, 232.76089477539062, -22.473770141601562, 1.455047607421875, 52.8603515625, 131.64724731445312, -42.80908203125, 11.9156494140625, 86.68844604492188, 18.162078857421875, -12.28411865234375, 0.0, 39.69573974609375, -14.463272094726562, -19.850616455078125, 220.54840087890625, 44.8104248046875, 143.5457305908203, 157.2847900390625, -37.575103759765625, 227.285888671875, 14.845489501953125, 56.15997314453125, 0.0, 84.04579162597656, 111.67660522460938, 20.7362060546875, -62.63818359375, -6.857421875, -48.476707458496094, 123.94705963134766, 78.25213623046875, 69.93942260742188, 188.43295288085938, 53.31156921386719, 249.197021484375, 213.109619140625, 76.49148559570312, 3.40771484375, 85.58605194091797, 198.69143676757812, -21.552154541015625, -31.29119873046875, 252.3311767578125, -11.360015869140625, 14.190353393554688, 80.48516845703125, 142.11187744140625, 29.06976318359375, 157.08045959472656, 16.338165283203125, 6.473052978515625, 110.35322570800781, 179.89364624023438, -5.595367431640625, 15.51983642578125, -67.53681945800781, 52.23126220703125, 191.9739990234375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000240.npy"}
{"epoch": 0.5026178010471204, "step": 241, "batch_size": 128, "mean": 45.166656494140625, "std": 90.44316101074219, "min": -178.3702392578125, "p10": -73.38340911865234, "median": 32.305206298828125, "p90": 161.08339233398436, "max": 279.43939208984375, "pos_frac": 0.6953125, "sample": [17.564453125, 60.490325927734375, 136.7534637451172, 46.123565673828125, 224.80467224121094, -29.9085693359375, 43.961578369140625, -8.937225341796875, 41.9195556640625, 11.16229248046875, 18.72796630859375, 157.13479614257812, 67.83834838867188, 44.9161376953125, 148.36924743652344, 9.305212020874023, -10.545188903808594, 173.43881225585938, 0.67132568359375, 60.032958984375, -27.158981323242188, -77.2755126953125, 190.21170043945312, -0.817901611328125, 20.716217041015625, 36.42873764038086, 206.52398681640625, 80.43194580078125, -37.96174621582031, -8.629844665527344, 143.65721130371094, -3.152250289916992, 3.4885177612304688, -75.41069030761719, 39.881683349609375, -4.008550643920898, -16.892608642578125, -39.25421142578125, 15.228626251220703, 67.0029296875, 32.94580078125, 139.08116149902344, -73.46476745605469, 25.852294921875, 9.770126342773438, 89.6998291015625, -148.73341369628906, 4.993499755859375, 65.3615951538086, 149.8084716796875, -76.516357421875, 10.799560546875, 87.82144165039062, -91.3663330078125, 42.928680419921875, 22.000885009765625, 42.62384033203125, -6.66717529296875, -55.11927795410156, 36.138702392578125, 130.20089721679688, 42.39503479003906, 18.65416717529297, 161.69866943359375, 10.520309448242188, 116.50155639648438, 263.7959289550781, 0.0, -62.31800079345703, 72.48146057128906, 37.13056945800781, 133.712158203125, 131.82667541503906, 119.97879028320312, -123.24798583984375, -4.0259857177734375, 35.01548767089844, 16.789993286132812, -82.55029296875, -11.88446044921875, 279.43939208984375, 117.85861206054688, 10.869140625, -178.3702392578125, 275.09417724609375, 109.97116088867188, 55.83777618408203, 17.504058837890625, 35.170318603515625, 118.63339233398438, 13.182464599609375, -90.23361206054688, 185.04043579101562, 50.289642333984375, 30.310516357421875, -78.31539916992188, -21.270172119140625, 198.3636932373047, 86.27152252197266, 277.948974609375, 23.10968017578125, 140.15159606933594, 90.54400634765625, -123.60006713867188, 135.02963256835938, 31.66461181640625, 144.0547332763672, -73.34854125976562, -12.7677001953125, 120.76312255859375, 160.8197021484375, 58.929656982421875, 221.36026000976562, 123.41635131835938, -32.167205810546875, -3.667938232421875, 95.6864013671875, 164.13905334472656, -1.26055908203125, -132.44061279296875, 19.874755859375, -51.33534240722656, 121.49880981445312, 61.37176513671875, 23.14715576171875, -33.625762939453125, 16.32025146484375, -41.39825439453125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000241.npy"}
{"epoch": 0.5047120418848168, "step": 242, "batch_size": 128, "mean": 53.70854949951172, "std": 85.33418273925781, "min": -136.04144287109375, "p10": -44.21248016357422, "median": 40.7525634765625, "p90": 168.883576965332, "max": 355.11572265625, "pos_frac": 0.7265625, "sample": [103.43243408203125, 21.146324157714844, 78.68280029296875, 19.760379791259766, -13.8411865234375, 179.9193115234375, 33.922271728515625, 202.95156860351562, 113.13484191894531, 165.59373474121094, -60.63859558105469, 107.98381805419922, 159.23348999023438, -43.75212097167969, -26.881484985351562, -59.997314453125, -6.37359619140625, -10.753715515136719, 183.97607421875, 341.8849182128906, 60.61164855957031, 143.71054077148438, 40.669158935546875, -12.56787109375, 51.129974365234375, 123.89286804199219, 236.2901611328125, 74.34281158447266, 64.1107177734375, 96.66671752929688, 38.15716552734375, 116.411865234375, 36.66456604003906, -28.91455841064453, 75.73550415039062, 99.52656555175781, 355.11572265625, -8.25927734375, -47.86585998535156, 67.53398895263672, 8.912904739379883, 166.2725067138672, 244.82736206054688, 83.98009490966797, -32.13787841796875, 44.77113342285156, -37.877777099609375, 7.528163909912109, 21.9813232421875, 37.6363525390625, 59.365631103515625, 79.913818359375, -14.020645141601562, 104.57235717773438, -11.084314346313477, -12.594406127929688, -45.286651611328125, 46.24835205078125, 11.217897415161133, 32.96630096435547, 80.20086669921875, 17.866783142089844, -2.3286895751953125, 141.40924072265625, 4.212158203125, 2.2814712524414062, 179.69842529296875, 35.23431396484375, 25.669403076171875, -30.83099365234375, -26.72515869140625, 75.50375366210938, 67.74456787109375, 208.41995239257812, 68.36285400390625, 232.3062744140625, 46.797119140625, 104.58792114257812, -58.10638427734375, 31.072647094726562, 12.129531860351562, 44.66236877441406, 113.7200927734375, -3.2861099243164062, 112.70677185058594, -48.4261474609375, 40.835968017578125, -136.04144287109375, 97.66525268554688, 52.282196044921875, 174.97607421875, 29.6026611328125, 94.22122192382812, 43.306480407714844, 33.42976379394531, -83.4395751953125, 25.171524047851562, -46.94805908203125, -40.68556213378906, 125.72479248046875, -108.54855346679688, -13.030933380126953, 92.83114624023438, 42.602813720703125, 23.5338134765625, 8.5765380859375, 113.19461059570312, -59.060699462890625, 79.63885498046875, 69.61280059814453, -11.740386962890625, 226.68922424316406, 61.76359558105469, 37.1864013671875, 33.91845703125, -89.36506652832031, 2.533111572265625, 19.25640869140625, 3.153228759765625, -21.020309448242188, 65.58711242675781, 62.401710510253906, -11.772411346435547, 227.94686889648438, 116.27910614013672, 137.15066528320312, -77.56301879882812, 128.41651916503906], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000242.npy"}
{"epoch": 0.506806282722513, "step": 243, "batch_size": 128, "mean": 61.323123931884766, "std": 94.62051391601562, "min": -151.55506896972656, "p10": -35.417198181152344, "median": 54.52604675292969, "p90": 179.9768112182617, "max": 558.611083984375, "pos_frac": 0.7421875, "sample": [-74.12213134765625, -34.495086669921875, 90.62339782714844, 30.7108154296875, -16.43536376953125, 88.78469848632812, 184.82659912109375, 44.1070556640625, -34.77241516113281, 64.11039733886719, 346.038330078125, -24.556602478027344, 76.12286376953125, 91.52899169921875, 103.12237548828125, -151.55506896972656, -15.233306884765625, 138.8058624267578, 26.25152587890625, 3.92999267578125, 62.46589660644531, 106.31269836425781, -51.114952087402344, 9.649322509765625, -5.034551620483398, 66.73331451416016, 3.536651611328125, 69.30722045898438, 103.8702163696289, -37.729583740234375, 18.437179565429688, 190.566650390625, 141.17880249023438, 99.46279907226562, 60.9808349609375, 190.34890747070312, 148.20953369140625, 21.52716064453125, 48.069732666015625, 291.32684326171875, 2.1188888549804688, 35.9945068359375, 7.0411376953125, 9.590126037597656, 11.426200866699219, 71.30169677734375, -25.75439453125, -134.80126953125, 135.04490661621094, -50.7535400390625, 41.1295166015625, -83.34893798828125, -51.29998779296875, -36.92169189453125, -39.31324768066406, 52.50927734375, 8.300872802734375, 0.0, 8.452629089355469, 21.49005126953125, 56.542816162109375, 187.005126953125, 60.681182861328125, 79.44607543945312, 23.7591552734375, 127.40267944335938, 46.01678466796875, 112.1024169921875, -42.02394104003906, 119.55062866210938, 33.93414306640625, 89.28948974609375, 51.692779541015625, 86.98126220703125, 558.611083984375, 92.33242797851562, -97.38688659667969, 5.75372314453125, -0.64373779296875, -8.470458984375, -27.50091552734375, 224.1055145263672, 88.47647094726562, 49.38098907470703, -13.694732666015625, 108.75302124023438, 162.3154296875, 71.26194763183594, 191.38681030273438, 177.89833068847656, 45.50709533691406, 56.870025634765625, 81.69502258300781, 92.46109008789062, 201.83758544921875, 137.37356567382812, 0.6349964141845703, -62.92384338378906, 60.9444580078125, -6.69305419921875, 18.53646469116211, 101.6748046875, 203.3433837890625, 9.72760009765625, -11.3206787109375, 58.53240966796875, -0.7092151641845703, 319.4996337890625, 173.30340576171875, 94.50637817382812, 137.00558471679688, -9.010772705078125, -4.043792724609375, 106.72384643554688, 73.12608337402344, 19.425010681152344, -14.646812438964844, 42.3824462890625, 135.5993194580078, 63.65924072265625, 107.6007080078125, -19.279460906982422, 69.04714965820312, 191.46253967285156, 139.79177856445312, 102.14892578125, 84.60498046875, -24.1016845703125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000243.npy"}
{"epoch": 0.5089005235602094, "step": 244, "batch_size": 128, "mean": 60.99681091308594, "std": 89.51834106445312, "min": -297.3167419433594, "p10": -36.63990783691406, "median": 60.67912292480469, "p90": 169.0255920410156, "max": 393.92657470703125, "pos_frac": 0.7578125, "sample": [-297.3167419433594, 108.0736083984375, 85.5908203125, -7.3999176025390625, -10.721649169921875, -20.40288543701172, -141.3236083984375, 185.9882049560547, 143.83984375, 117.76275634765625, 111.24591064453125, 105.22369384765625, -6.9901580810546875, -14.07012939453125, 64.7410888671875, 90.711669921875, 16.479644775390625, 71.9808349609375, 145.42193603515625, 160.79498291015625, 225.2694549560547, 17.567607879638672, 182.13348388671875, 173.36773681640625, -5.590728759765625, 117.4964599609375, 104.74502563476562, -40.95854187011719, -21.949100494384766, 87.97412109375, 58.52764892578125, 153.93951416015625, 157.21185302734375, 22.104888916015625, 255.981689453125, -38.7762451171875, 228.4964599609375, 22.720809936523438, 108.60708618164062, 126.81206512451172, 58.130035400390625, 107.04048919677734, -52.759368896484375, -13.8900146484375, 0.0, 114.42166137695312, 46.04742431640625, -103.94775390625, 393.92657470703125, 69.97113037109375, 105.24691772460938, 52.83708190917969, 29.497940063476562, 115.430908203125, 50.06333923339844, 36.151084899902344, 46.390411376953125, 111.336181640625, -10.289459228515625, 209.19468688964844, -7.618831634521484, -18.65692901611328, 15.125938415527344, -76.07307434082031, 99.95306396484375, 41.467315673828125, 95.67291259765625, 38.67645263671875, 6.7593231201171875, 94.0032958984375, 234.06500244140625, 98.48795318603516, 124.7889404296875, 3.3692626953125, -67.72050476074219, 11.426071166992188, 129.78378295898438, 76.38945007324219, -32.603302001953125, 21.166305541992188, -117.05599975585938, 90.9566650390625, 86.66580200195312, 1.7423248291015625, -1.99609375, 121.05047607421875, 73.388427734375, 24.667251586914062, 198.58700561523438, 0.94500732421875, 28.20208740234375, -15.71844482421875, 55.353858947753906, 38.239959716796875, 175.56060791015625, -35.724334716796875, 65.21890258789062, 15.243850708007812, 73.96345520019531, 97.69561767578125, 167.1646728515625, 50.19325256347656, 3.44268798828125, 5.6295623779296875, 148.6109619140625, -24.93048095703125, 27.197921752929688, -46.59626770019531, 62.830596923828125, -46.92939758300781, -123.00372314453125, 134.1781005859375, 91.3782958984375, -18.13470458984375, 78.6788330078125, 104.50172424316406, 142.64761352539062, 102.35773468017578, -59.39404296875, 57.368316650390625, 202.02914428710938, 96.5670166015625, 144.07574462890625, 185.6239471435547, 52.600341796875, 66.653564453125, 74.72525024414062, 52.56549072265625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000244.npy"}
{"epoch": 0.5109947643979058, "step": 245, "batch_size": 128, "mean": 57.73851013183594, "std": 82.6352310180664, "min": -209.07196044921875, "p10": -30.65267105102539, "median": 54.38825607299805, "p90": 164.6157257080078, "max": 260.55975341796875, "pos_frac": 0.8046875, "sample": [29.39739990234375, -18.04583740234375, 91.45327758789062, 31.87908172607422, 113.81097412109375, 105.89132690429688, 47.60581970214844, 120.38613891601562, 181.57147216796875, 165.23153686523438, 25.059661865234375, 169.80548095703125, 75.99349212646484, -32.07781982421875, 1.425537109375, 6.573419570922852, 77.27127075195312, 128.24655151367188, 80.0439224243164, -97.59199523925781, 164.351806640625, 60.746795654296875, 184.40890502929688, 75.57090759277344, -17.162139892578125, 101.23799896240234, 38.26080322265625, 37.621971130371094, 121.9677734375, 59.28485107421875, 196.618896484375, 31.526123046875, 6.65582275390625, 51.47540283203125, 58.40472412109375, -13.12353515625, 40.401275634765625, 149.6136474609375, 125.20965576171875, 52.791473388671875, -155.89964294433594, -81.58602905273438, -58.11946105957031, -40.4976806640625, -85.07421875, 260.03131103515625, 104.25360107421875, -30.041893005371094, 79.30624389648438, -34.68522644042969, 14.547096252441406, 74.26118469238281, 25.351470947265625, 73.23155212402344, 74.65788269042969, -209.07196044921875, 18.811279296875, -70.78550720214844, 18.39361572265625, 132.98333740234375, 45.04743957519531, 7.6845855712890625, 70.1373291015625, -115.90646362304688, 50.357513427734375, 132.33831787109375, 51.535430908203125, 42.693603515625, 144.35525512695312, 44.832122802734375, 19.64617919921875, 56.06849670410156, 156.0698699951172, 64.55499267578125, 0.0, 46.5401611328125, 77.80726623535156, 95.37260437011719, 140.67137145996094, 118.18562316894531, 9.337324142456055, 82.0877685546875, 5.5301666259765625, -67.44464111328125, -4.3948211669921875, 87.6002197265625, 103.14804077148438, 82.31964111328125, 122.14970397949219, 4.53582763671875, 132.87188720703125, 69.90536499023438, 105.8658447265625, 11.000442504882812, 143.90963745117188, 146.00473022460938, 2.512664794921875, 114.24777221679688, 260.55975341796875, 1.953857421875, 35.562744140625, 68.40805053710938, 42.090087890625, 97.57943725585938, 172.72491455078125, -19.037017822265625, 226.40005493164062, 122.52932739257812, 191.80670166015625, 25.13903045654297, 51.629913330078125, 53.55036926269531, 2.484344482421875, 10.83127212524414, 227.93954467773438, 102.80117797851562, 55.22614288330078, 56.73780822753906, -11.73333740234375, -26.346092224121094, -29.985687255859375, 165.45620727539062, -22.9290771484375, 225.29873657226562, -165.54046630859375, 1.589223861694336, 0.0, 128.76242065429688], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000245.npy"}
{"epoch": 0.5130890052356021, "step": 246, "batch_size": 128, "mean": 42.68506622314453, "std": 91.2027359008789, "min": -216.33392333984375, "p10": -67.85047988891601, "median": 32.38523483276367, "p90": 169.94776916503906, "max": 281.6607666015625, "pos_frac": 0.6796875, "sample": [42.573974609375, 32.766815185546875, 106.38801574707031, -41.546112060546875, 209.89422607421875, 33.54502868652344, 9.792724609375, -18.860733032226562, -22.384628295898438, 151.02102661132812, 233.48211669921875, -131.98434448242188, -24.574462890625, 47.932373046875, 22.0013427734375, 122.43975830078125, -57.19450378417969, 13.294601440429688, 73.62066650390625, 87.58746337890625, -25.97766876220703, -15.90350341796875, 0.0, 64.30426025390625, -81.338134765625, -50.92047119140625, 63.468994140625, 55.178131103515625, 162.5384521484375, 102.580810546875, 26.708236694335938, 96.03912353515625, 119.60955810546875, -68.91497802734375, 251.58163452148438, 6.89154052734375, -101.71469116210938, 112.25881958007812, 30.5118408203125, 0.5660877227783203, 23.24462890625, 150.75604248046875, 41.92340087890625, 125.12520599365234, -38.80546569824219, 51.768798828125, 142.56954956054688, -131.6197052001953, -15.693008422851562, 179.55853271484375, 281.6607666015625, 3.599365234375, 157.02566528320312, 169.71417236328125, 17.681106567382812, 56.17964172363281, 18.545970916748047, -71.04873657226562, -86.96115112304688, -0.99163818359375, -74.28532409667969, -86.14707946777344, -21.31219482421875, 22.42340087890625, 39.2880973815918, 271.42047119140625, -45.61940002441406, 47.16405487060547, 66.912109375, -77.512451171875, -3.5545654296875, 175.7659912109375, 49.98162841796875, -17.714324951171875, 112.38128662109375, -38.776123046875, 38.1690673828125, -67.5644760131836, -216.33392333984375, 2.9324588775634766, -39.91839599609375, -6.7080078125, 11.05523681640625, 35.6539306640625, 195.7057647705078, 106.03425598144531, -28.552825927734375, 111.29296875, 124.55548095703125, 35.194915771484375, 32.7310791015625, 127.1783447265625, 175.8878173828125, 85.17996215820312, 83.23536682128906, 120.11264038085938, 27.757293701171875, 20.51992416381836, 7.682929992675781, 3.4351806640625, -68.517822265625, -36.329345703125, 32.039390563964844, -3.83319091796875, 8.115814208984375, 14.56195068359375, 71.4127197265625, -45.388214111328125, 266.170166015625, -62.15887451171875, 160.25396728515625, -84.16046142578125, 227.47564697265625, -20.209320068359375, -66.57893371582031, 34.221405029296875, 96.65777587890625, 91.37094116210938, 64.59022521972656, 3.643890380859375, 42.265769958496094, -35.09526062011719, 137.03945922851562, 206.74789428710938, 50.73307800292969, 32.038299560546875, 58.97929382324219, 170.49282836914062], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000246.npy"}
{"epoch": 0.5151832460732985, "step": 247, "batch_size": 128, "mean": 47.413230895996094, "std": 85.02862548828125, "min": -164.37490844726562, "p10": -42.74706497192381, "median": 31.308963775634766, "p90": 160.27019653320312, "max": 319.09051513671875, "pos_frac": 0.7265625, "sample": [149.2858428955078, -84.10235595703125, 109.74954223632812, 139.83517456054688, 4.619903564453125, 272.0952453613281, 86.85224151611328, -68.82162475585938, -8.8592529296875, 9.127166748046875, -17.341339111328125, 68.28065490722656, 10.733131408691406, -86.5543212890625, 22.40576171875, 54.76853942871094, 84.5843505859375, -34.88154602050781, 112.60003662109375, -38.517852783203125, 80.74411010742188, 128.41421508789062, 167.11785888671875, -27.861907958984375, 121.50341796875, 36.905052185058594, 116.9554443359375, -16.80059814453125, 19.655845642089844, 81.3125, 1.7510242462158203, -23.260398864746094, -60.803070068359375, 64.70130920410156, 104.39474487304688, 82.61922454833984, 3.6044464111328125, 204.34579467773438, -12.7947998046875, 13.523605346679688, -8.775955200195312, 159.23944091796875, -14.442474365234375, -13.513084411621094, -34.65180969238281, 12.9569091796875, -84.52584838867188, 170.67282104492188, -13.458892822265625, -118.42019653320312, -25.48590087890625, 319.09051513671875, 28.934303283691406, 31.37787628173828, 44.83270263671875, 96.8505859375, -164.37490844726562, 6.527618408203125, 20.41602325439453, 1.7391357421875, 131.37286376953125, -30.04922866821289, -106.02536010742188, 3.1466064453125, 74.78634643554688, 5.666461944580078, 0.0, 187.61712646484375, -52.61522674560547, -27.104278564453125, 31.24005126953125, 1.566802978515625, 59.287139892578125, 60.97999572753906, 70.4818115234375, -9.698883056640625, 107.74359130859375, 64.93551635742188, 146.33551025390625, -13.031791687011719, -133.64242553710938, 6.950153350830078, 91.62893676757812, 15.731246948242188, 136.20806884765625, -7.21551513671875, 57.724853515625, -75.40261840820312, 104.8870849609375, 74.33599853515625, 57.6802978515625, 165.60398864746094, 115.74909973144531, 68.58419799804688, 80.57350158691406, -5.435272216796875, 170.9142608642578, 24.705322265625, 22.504241943359375, 169.09994506835938, 94.78533935546875, 96.61676025390625, -3.863971710205078, 20.5562744140625, 38.994873046875, 162.67529296875, 82.84829711914062, 166.6827392578125, -115.30949401855469, 21.47650146484375, 15.186004638671875, 264.48614501953125, 57.15343475341797, 5.05682373046875, 143.5484619140625, 83.96855163574219, 293.34881591796875, 68.94845581054688, -87.13320922851562, 71.2805404663086, 28.2999267578125, 53.82225036621094, 24.247802734375, 129.81130981445312, 123.66596984863281, 27.016525268554688, 14.513641357421875, 51.51324462890625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000247.npy"}
{"epoch": 0.5172774869109947, "step": 248, "batch_size": 128, "mean": 46.14619445800781, "std": 83.82504272460938, "min": -232.88739013671875, "p10": -59.45089645385742, "median": 36.025047302246094, "p90": 140.6842056274414, "max": 316.61822509765625, "pos_frac": 0.75, "sample": [19.53032684326172, 102.0255126953125, 2.7709426879882812, 121.39404296875, 127.0853271484375, -18.04473876953125, 68.45235443115234, 81.76551818847656, 149.83163452148438, 21.58294677734375, -4.606781005859375, 24.35289192199707, 52.82630920410156, 17.12847900390625, 29.0526123046875, 225.63668823242188, 120.08270263671875, 58.4146728515625, 222.87457275390625, 126.41746520996094, 118.55657958984375, 35.875640869140625, 40.362335205078125, 99.00857543945312, 77.4608154296875, 111.74835205078125, 44.45482635498047, 35.61907958984375, -13.4713134765625, -69.34828186035156, 316.61822509765625, 70.3837890625, 129.20916748046875, -16.8480224609375, 36.53936767578125, 18.56854248046875, 114.24862670898438, -232.88739013671875, 3.2415733337402344, -58.32849884033203, -38.46575164794922, 96.004638671875, 7.3609161376953125, 141.7130889892578, 55.7601318359375, 112.40089416503906, 287.080810546875, 15.010498046875, -51.165611267089844, 107.90728759765625, 119.80303955078125, -37.29913330078125, 2.7554683685302734, -130.99832153320312, 140.24325561523438, 18.129547119140625, 41.93352508544922, 132.3318328857422, -62.06982421875, -69.343017578125, 36.731658935546875, 104.86431884765625, 60.774993896484375, 8.623046875, 118.932373046875, 24.921730041503906, 24.57550048828125, 34.133087158203125, -28.9171142578125, 8.376152038574219, 95.26246643066406, -67.28692626953125, 115.63790893554688, 67.1185302734375, 105.59735107421875, 29.614181518554688, 5.09954833984375, 2.952159881591797, 80.09213256835938, 118.69802856445312, 9.191753387451172, 36.17445373535156, 16.48345947265625, -127.27639770507812, 64.36447143554688, 166.49822998046875, 104.81399536132812, -75.21348571777344, 10.638965606689453, 117.52230834960938, 31.520599365234375, -7.9842529296875, -20.485618591308594, -20.789039611816406, 29.308624267578125, -0.3897705078125, 91.31234741210938, 4.956207275390625, -31.721527099609375, 46.29583740234375, -54.7608642578125, 189.769287109375, 68.05931091308594, -72.41958618164062, -4.668792724609375, 155.19699096679688, 71.34394836425781, -105.81416320800781, 76.13522338867188, 164.05506896972656, -33.76617431640625, -18.336563110351562, 0.0, 101.73190307617188, 178.535888671875, 125.84048461914062, 23.860076904296875, -71.82247924804688, 23.553451538085938, 19.65545654296875, 153.3446044921875, 112.37394714355469, 162.72640991210938, -71.525634765625, 28.6226806640625, -153.1605224609375, 96.0238265991211, 52.45787048339844], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000248.npy"}
{"epoch": 0.5193717277486911, "step": 249, "batch_size": 128, "mean": 45.01103591918945, "std": 93.71794128417969, "min": -305.5341796875, "p10": -61.93985595703125, "median": 33.790225982666016, "p90": 170.37308807373046, "max": 274.10223388671875, "pos_frac": 0.6953125, "sample": [-81.47142028808594, 43.355220794677734, -63.024078369140625, -26.383758544921875, 73.7138671875, -102.65406799316406, 26.653900146484375, 32.383384704589844, 51.670562744140625, 98.11843872070312, 14.837890625, 90.9146728515625, 130.45547485351562, 60.55335998535156, -114.81890869140625, 49.22260284423828, 65.922607421875, 1.5655593872070312, 125.21939086914062, -5.2053680419921875, 102.69259643554688, 120.72160339355469, -34.53265380859375, -65.33943176269531, 197.61550903320312, 79.89227294921875, 41.663124084472656, 32.928741455078125, 4.40643310546875, 10.032859802246094, 43.67529296875, -18.105255126953125, 13.3934326171875, 5.0902252197265625, -11.112457275390625, -63.31585693359375, 202.13873291015625, 1.7008895874023438, 98.50473022460938, -126.68716430664062, 49.757476806640625, -305.5341796875, 157.4890899658203, 109.868408203125, 180.24301147460938, 32.37781524658203, 176.02947998046875, 123.99566650390625, 79.12068939208984, 34.54749298095703, -29.265289306640625, 24.692359924316406, 274.10223388671875, -175.787109375, 16.8144588470459, 6.5184326171875, 106.2838134765625, 63.21356201171875, -36.43006896972656, -67.17936706542969, 66.03486633300781, 49.61053466796875, 26.491477966308594, 33.032958984375, -44.57240295410156, 27.652572631835938, -22.931114196777344, 5.895851135253906, 74.10043334960938, 156.019287109375, -11.495010375976562, 171.54714965820312, -15.66619873046875, 269.59796142578125, 83.26101684570312, -89.17903137207031, 24.614940643310547, 79.2956771850586, -46.6561279296875, 14.620407104492188, 151.30715942382812, 118.88528442382812, 50.86863708496094, 102.042236328125, 83.23690032958984, 204.77496337890625, -19.217605590820312, -131.08837890625, 9.635116577148438, 110.76446533203125, 4.3560028076171875, -23.979248046875, 136.54721069335938, 112.92947387695312, 135.57798767089844, -15.2940673828125, 128.84359741210938, 168.53826904296875, -39.579132080078125, 190.388671875, 211.04299926757812, -11.11529541015625, -54.95936584472656, -32.07444763183594, -45.573516845703125, 40.04689025878906, 56.069976806640625, -61.475189208984375, 213.30215454101562, -41.1141357421875, -1.78094482421875, 4.290191650390625, 169.31610107421875, 150.52813720703125, 169.8699188232422, 139.078369140625, -35.92633056640625, -40.78328323364258, -90.00247192382812, 27.065982818603516, 195.2391357421875, 92.44357299804688, 115.42416381835938, 121.12771606445312, -61.20446014404297, 17.7750244140625, 44.1275749206543, 212.61029052734375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000249.npy"}
{"epoch": 0.5214659685863874, "step": 250, "batch_size": 128, "mean": 63.330284118652344, "std": 96.65293884277344, "min": -195.19529724121094, "p10": -49.91085968017578, "median": 55.50809860229492, "p90": 188.1075439453125, "max": 295.726806640625, "pos_frac": 0.7421875, "sample": [58.62257385253906, 7.996490478515625, -155.84901428222656, 144.8931884765625, 150.32781982421875, 288.60723876953125, -36.62242889404297, 71.1156005859375, 70.44525146484375, 62.58538818359375, 125.14910888671875, 252.7425537109375, -4.828420639038086, 74.99400329589844, 60.97886657714844, 36.43194580078125, 87.06119537353516, 60.95574951171875, 155.01242065429688, 39.49168395996094, 49.93150329589844, -57.81603240966797, -19.4376220703125, 186.49513244628906, 60.73511505126953, -26.1075439453125, 146.04595947265625, -102.17364501953125, 278.4222412109375, -1.7381591796875, 100.05023193359375, 129.56076049804688, 65.0196533203125, 187.469970703125, 168.31231689453125, 114.37223052978516, -49.52484130859375, -7.026576995849609, 168.68630981445312, -80.93707275390625, 203.47100830078125, 40.34259033203125, 51.68663024902344, 133.43577575683594, 52.953521728515625, 41.890594482421875, 48.6475830078125, 44.59222412109375, -14.655620574951172, 176.88804626464844, 99.14132690429688, 114.15887451171875, 153.83343505859375, 74.4747543334961, 79.456298828125, 115.69937133789062, 176.37835693359375, 82.67752075195312, 169.3120574951172, -94.45877075195312, 201.23892211914062, 23.718719482421875, 30.35693359375, 93.99705505371094, 118.72578430175781, -24.79068946838379, 7.412147521972656, 53.70342254638672, -7.544822692871094, -61.9271240234375, 236.7186279296875, 95.53767395019531, -61.10638427734375, -14.457550048828125, -195.19529724121094, 27.945053100585938, 43.108741760253906, -57.431640625, -18.052764892578125, -50.81156921386719, 15.670684814453125, 122.69038391113281, 34.4942626953125, -43.0047607421875, 35.295711517333984, 199.20457458496094, 7.871337890625, -10.058502197265625, 86.56246948242188, 212.26663208007812, 50.02410888671875, 287.310546875, 51.73017883300781, 136.04290771484375, 63.53326416015625, -45.27790832519531, 295.726806640625, 225.729248046875, 22.59979248046875, 277.68206787109375, 75.63900756835938, 82.22119140625, -142.67807006835938, -20.49114990234375, 9.47979736328125, 48.7149658203125, 150.494140625, -27.63690185546875, 171.20774841308594, 77.49293518066406, 153.83030700683594, -43.423336029052734, 80.58938598632812, 189.59521484375, 160.2430419921875, 66.93052673339844, 14.480308532714844, 9.904083251953125, 6.4696044921875, 83.73626708984375, -56.758888244628906, 17.295272827148438, -156.300048828125, 24.776138305664062, 16.893798828125, -13.86126708984375, 57.312774658203125, -7.496051788330078], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000250.npy"}
{"epoch": 0.5235602094240838, "step": 251, "batch_size": 128, "mean": 52.89482498168945, "std": 95.30791473388672, "min": -149.35723876953125, "p10": -50.67664566040039, "median": 32.07188415527344, "p90": 168.84320983886718, "max": 432.321044921875, "pos_frac": 0.734375, "sample": [17.39349365234375, 2.0499267578125, 50.122406005859375, -50.273765563964844, 13.102615356445312, 119.9627685546875, 61.8553466796875, -46.17627716064453, 25.279937744140625, -8.745376586914062, 17.252578735351562, 136.267822265625, 13.641014099121094, 54.475372314453125, -45.17082214355469, 85.93023681640625, 54.829498291015625, -32.47547149658203, 2.8837203979492188, 8.70138168334961, 229.8843994140625, 89.56423950195312, 131.10830688476562, -55.187782287597656, 31.302978515625, 34.63075256347656, -0.9875221252441406, -16.453994750976562, -39.05230712890625, 34.4853515625, -6.517173767089844, -62.47950744628906, 57.44970703125, 16.202606201171875, 19.988677978515625, -72.20014953613281, 171.3680419921875, 18.501117706298828, -24.226730346679688, -53.974853515625, -21.363449096679688, 28.381179809570312, -39.600433349609375, 145.5181884765625, 432.321044921875, 75.39590454101562, 55.22541809082031, 71.86103057861328, 53.681884765625, -15.766944885253906, 167.76113891601562, 18.93267822265625, 114.29586791992188, 51.941070556640625, 140.35787963867188, 8.3612060546875, 112.37993621826172, 63.98674774169922, 129.09716796875, -47.96330261230469, -62.2958984375, -23.98815155029297, -27.286361694335938, 183.35446166992188, 78.4007568359375, 123.08990478515625, 12.344470977783203, 119.94393920898438, 67.103515625, 84.74139404296875, 111.44512939453125, 262.3028259277344, 45.42779541015625, 172.18048095703125, -69.3125, 126.14663696289062, 22.0098876953125, 66.1673583984375, 33.86865234375, -63.040565490722656, 110.47651672363281, 14.103687286376953, 180.74391174316406, 3.82781982421875, 206.42745971679688, 56.81512451171875, 156.96226501464844, -45.58949279785156, -124.92036437988281, 32.514495849609375, 18.690277099609375, 418.81591796875, 210.39511108398438, 37.18495178222656, 250.384033203125, 128.50338745117188, 93.85177612304688, 41.60191345214844, -5.9905548095703125, 18.79151153564453, 50.5443115234375, -87.69110107421875, -85.98834228515625, -68.4061050415039, 113.74462890625, 25.47003173828125, 58.51226806640625, 31.6292724609375, 157.75485229492188, -149.35723876953125, 15.803573608398438, 92.95755004882812, 8.145637512207031, 8.35638427734375, 25.08966827392578, 98.67523193359375, 31.425338745117188, -51.61669921875, -3.63580322265625, 173.00262451171875, 122.994140625, 28.52069091796875, 386.1462097167969, -0.5553112030029297, -14.2994384765625, 30.235084533691406, 95.83439636230469, 81.93562316894531], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000251.npy"}
{"epoch": 0.5256544502617801, "step": 252, "batch_size": 128, "mean": 56.3546028137207, "std": 96.58229064941406, "min": -228.30801391601562, "p10": -63.67615966796875, "median": 54.61592102050781, "p90": 175.68762512207027, "max": 288.35614013671875, "pos_frac": 0.7265625, "sample": [157.43289184570312, 225.5907440185547, 201.87689208984375, 121.80618286132812, 122.0980224609375, 60.19110107421875, -98.04845428466797, 92.49288940429688, 101.74828338623047, 136.37506103515625, 148.7221221923828, 80.212890625, 81.22198486328125, -38.566246032714844, 126.2535400390625, -11.501220703125, 28.600494384765625, 60.55589294433594, 131.6814422607422, 46.41650390625, -2.2178497314453125, 281.4906921386719, 72.80865478515625, 40.53662109375, 127.533203125, 55.56787109375, 215.1524658203125, 163.88375854492188, 110.82650756835938, -62.1707763671875, -12.01580810546875, 9.729827880859375, -7.441978454589844, 29.049468994140625, -17.502349853515625, 43.135772705078125, -215.560302734375, -34.45757293701172, 49.439666748046875, 204.72686767578125, -4.19879150390625, 172.19546508789062, -6.856658935546875, 286.49285888671875, 194.18154907226562, 156.8308563232422, 2.7005615234375, 35.05810546875, 96.78311157226562, 128.27969360351562, -10.97314453125, 183.83599853515625, -72.16998291015625, 114.8305435180664, 117.16763305664062, -69.864013671875, 32.023712158203125, -22.501388549804688, 0.0, 90.84727478027344, 84.81736755371094, 191.67214965820312, -70.15188598632812, 152.03018188476562, 17.136079788208008, -77.45755004882812, 83.42398834228516, -200.50437927246094, 111.374267578125, -7.0086669921875, 2.5551986694335938, 49.119720458984375, -228.30801391601562, -7.812187194824219, 3.46881103515625, 142.8704833984375, 158.64254760742188, -67.188720703125, 81.33404541015625, -29.548721313476562, 144.70770263671875, 40.961578369140625, 21.984756469726562, 71.86007690429688, 123.75917053222656, 33.2747802734375, 28.483978271484375, -162.25830078125, -115.79547119140625, 185.42832946777344, 0.49016571044921875, 155.54800415039062, 80.878662109375, 122.95445251464844, 98.738525390625, 23.49542236328125, 130.2650146484375, 113.60989379882812, 288.35614013671875, 64.7999267578125, 5.3280029296875, 50.81141662597656, 28.9891357421875, -6.151702880859375, -19.2496337890625, 7.1710205078125, 81.0273208618164, 133.53750610351562, -76.43330383300781, -57.51871871948242, 53.663970947265625, 197.39007568359375, -49.63861083984375, 28.06329345703125, 202.8404541015625, 1.0540313720703125, 107.61700439453125, 65.09150695800781, -10.72991943359375, -146.19976806640625, 59.275146484375, 25.8992919921875, 34.5272216796875, 118.9261474609375, -11.613983154296875, 63.475013732910156, 104.97252655029297, 126.91824340820312], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000252.npy"}
{"epoch": 0.5277486910994764, "step": 253, "batch_size": 128, "mean": 51.64704132080078, "std": 99.68474578857422, "min": -203.62164306640625, "p10": -69.20093536376953, "median": 37.53251266479492, "p90": 182.14558105468748, "max": 371.3601989746094, "pos_frac": 0.6953125, "sample": [241.17977905273438, 8.750244140625, 43.92200469970703, 39.15087890625, -121.32194519042969, 3.351654052734375, 180.0789794921875, 83.29837036132812, 100.32843017578125, -107.97300720214844, 190.36404418945312, 20.375244140625, -62.83216857910156, 102.39251708984375, 254.99993896484375, 25.829193115234375, 61.327239990234375, 101.71556091308594, 1.400299072265625, -2.86126708984375, -83.89938354492188, 154.40460205078125, 85.76811218261719, 32.50584411621094, 18.286510467529297, 8.073333740234375, 133.71514892578125, 110.13720703125, 128.9246826171875, -61.04319763183594, 100.86250305175781, 21.724822998046875, -6.495635986328125, -10.841705322265625, 3.81317138671875, 371.3601989746094, -68.71586608886719, 102.55398559570312, -145.4790802001953, -18.79608154296875, 82.10702514648438, -9.565277099609375, 86.60454559326172, 66.467041015625, 119.48406982421875, 71.49903869628906, 82.68923950195312, 13.70684814453125, 262.83575439453125, -112.83758544921875, 25.666915893554688, -33.422698974609375, 180.0720672607422, 39.78538513183594, -70.332763671875, -27.1876220703125, 112.47610473632812, -22.41876220703125, -32.86549377441406, -25.902313232421875, 45.430694580078125, 157.66908264160156, 106.86956787109375, 242.90536499023438, 278.03643798828125, 33.751129150390625, 46.90252685546875, 87.61437225341797, 175.8109130859375, 7.503692626953125, 167.80056762695312, 131.8366241455078, 29.09637451171875, -203.62164306640625, 48.495025634765625, 169.66220092773438, -12.651451110839844, 35.914146423339844, -49.918304443359375, 61.8875732421875, -73.28016662597656, 85.86019897460938, -60.446197509765625, 7.873847961425781, 112.78076171875, 151.86134338378906, 24.336898803710938, 247.89785766601562, -89.37890625, 199.45840454101562, 61.137451171875, -46.28306579589844, 26.083099365234375, 258.458984375, 206.49893188476562, 140.44361877441406, -4.309326171875, -30.276214599609375, 92.147705078125, -89.58059692382812, 54.217132568359375, -93.94065856933594, 123.89163208007812, -27.036834716796875, 53.80687713623047, 107.3149185180664, 19.618074417114258, -60.007537841796875, 192.9717559814453, -102.87059020996094, -19.640548706054688, 186.9676513671875, -102.48052978515625, 3.7166500091552734, -19.047103881835938, -10.37548828125, 72.11764526367188, 12.745899200439453, 54.41357421875, 90.16159057617188, 105.55935668945312, 18.47064208984375, 11.52093505859375, 174.0670166015625, -29.838287353515625, 6.9194488525390625, -10.131782531738281, 166.23428344726562], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000253.npy"}
{"epoch": 0.5298429319371728, "step": 254, "batch_size": 128, "mean": 58.82168197631836, "std": 88.81324768066406, "min": -142.89227294921875, "p10": -39.81251068115234, "median": 50.314674377441406, "p90": 164.76879272460937, "max": 289.9714660644531, "pos_frac": 0.7109375, "sample": [162.77859497070312, 5.17364501953125, -142.89227294921875, 60.03423309326172, -84.43515014648438, 131.66744995117188, 72.0169906616211, 118.06304931640625, 30.726318359375, 108.56292724609375, 16.1680908203125, -61.916259765625, 279.6971435546875, 165.94406127929688, -2.2625350952148438, 4.193109512329102, 89.268798828125, 98.4970703125, 65.85202026367188, 95.08908081054688, 16.556703567504883, 289.9714660644531, -11.510345458984375, -5.8575439453125, -18.84515380859375, -8.278900146484375, 100.19317626953125, 196.3717041015625, 134.8776092529297, 236.22735595703125, 93.68014526367188, 12.43017578125, -58.52392578125, 0.9957332611083984, 196.87879943847656, -39.163612365722656, -78.99214172363281, 113.6096420288086, 10.880096435546875, -32.60686492919922, -61.0269775390625, 129.355712890625, 13.623359680175781, 18.81341552734375, 12.565303802490234, 156.9632568359375, 105.21484375, 9.24639892578125, 182.65304565429688, 140.31985473632812, -35.589874267578125, 19.574432373046875, -24.041305541992188, 70.5858154296875, 20.822021484375, -1.8093395233154297, 121.04383850097656, 164.26510620117188, 116.63397216796875, 56.148468017578125, -121.55905151367188, -10.920623779296875, 133.82470703125, 28.132095336914062, 31.515615463256836, 12.233715057373047, -37.27676773071289, -34.350372314453125, -18.33837890625, 151.7050018310547, 112.1708984375, -82.2730712890625, 28.098297119140625, 14.1654052734375, 137.4793701171875, -11.00469970703125, 112.339111328125, 131.06405639648438, 138.748291015625, 86.34402465820312, 114.04666137695312, 46.51567077636719, 124.42518615722656, -17.765777587890625, 0.0, -52.311981201171875, 187.31338500976562, 118.85324096679688, 2.14520263671875, 275.67279052734375, 16.157588958740234, 195.81210327148438, -61.995849609375, -60.583984375, 29.8486328125, 271.0367126464844, -21.219818115234375, 28.623046875, -21.66796112060547, 105.78884887695312, -38.71228790283203, -12.681327819824219, 75.19918823242188, 159.53457641601562, 129.70599365234375, 78.0086669921875, 56.82464599609375, 249.07760620117188, 58.686973571777344, -35.752655029296875, 151.2994384765625, 2.1914901733398438, -8.614349365234375, 43.37835693359375, -62.95917510986328, -30.261680603027344, 54.113677978515625, 115.88945007324219, 20.016456604003906, -41.32660675048828, 112.1943359375, 145.7310028076172, 226.8733367919922, 55.526611328125, 55.62384033203125, 132.42599487304688, 106.41143798828125, 71.49723815917969], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000254.npy"}
{"epoch": 0.5319371727748691, "step": 255, "batch_size": 128, "mean": 65.27584838867188, "std": 104.38836669921875, "min": -300.1837158203125, "p10": -55.10045471191406, "median": 62.3387451171875, "p90": 209.1187561035156, "max": 365.3948974609375, "pos_frac": 0.734375, "sample": [-23.828615188598633, -51.149017333984375, 115.02410888671875, 230.09317016601562, 112.65687561035156, 38.885528564453125, 87.97262573242188, 22.537109375, -9.550849914550781, 43.027099609375, 264.2332763671875, 12.63531494140625, 144.12811279296875, 27.586105346679688, 150.54248046875, 164.98486328125, 59.3677978515625, 79.78858184814453, 13.443126678466797, -70.11945343017578, 180.544189453125, 88.64572143554688, 35.029449462890625, 42.00895690917969, 15.008377075195312, 78.36044311523438, -127.01821899414062, 236.71029663085938, -53.93220520019531, 186.92074584960938, 216.14215087890625, 4.271820068359375, 52.8297119140625, -142.1639404296875, -42.55389404296875, 99.879638671875, 181.07049560546875, 96.01382446289062, -116.59970092773438, -23.488861083984375, 254.59494018554688, -74.75849914550781, 49.04083251953125, 107.63140869140625, 206.1663818359375, 81.6922607421875, 166.97427368164062, -24.580780029296875, 218.5885772705078, 20.681549072265625, 108.5885009765625, -15.00238037109375, 6.788356781005859, -59.729034423828125, -9.230049133300781, -9.184371948242188, 44.417022705078125, 176.22174072265625, 63.18994140625, 142.45094299316406, -36.5885009765625, -45.2713623046875, 73.31500244140625, 98.55953979492188, -30.7001953125, 132.62149047851562, -24.87255859375, -29.97473907470703, 220.85546875, 187.30764770507812, 247.95333862304688, 74.00662231445312, 61.487548828125, 155.93276977539062, 83.5719985961914, 151.09829711914062, 66.49871826171875, -42.132598876953125, -131.53765869140625, 67.10948181152344, 2.81683349609375, -92.95954895019531, 7.112030029296875, 61.24220275878906, 121.88568115234375, 129.127685546875, -70.29878234863281, 148.36282348632812, -3.0893402099609375, 23.554107666015625, 158.35391235351562, 66.53656005859375, 47.702552795410156, 120.18466186523438, -1.290130615234375, 152.8531494140625, 30.56222152709961, 34.91094970703125, 365.3948974609375, 330.3121337890625, -120.82475280761719, -57.82637023925781, -78.52459716796875, 72.08511352539062, -16.123931884765625, 60.387786865234375, 124.53009033203125, 81.15093994140625, 42.78570556640625, 216.00762939453125, 76.35137939453125, 101.20469665527344, -3.166370391845703, 80.56314086914062, 80.14923095703125, 99.84403991699219, 53.88935852050781, 132.29119873046875, 12.1312255859375, -44.89508056640625, 146.44088745117188, 259.3277587890625, 2.541473388671875, 240.1309814453125, -300.1837158203125, 189.6549072265625, 18.623626708984375, 99.76806640625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000255.npy"}
{"epoch": 0.5340314136125655, "step": 256, "batch_size": 128, "mean": 61.21720504760742, "std": 114.58416748046875, "min": -556.8269653320312, "p10": -66.64862823486327, "median": 67.72588348388672, "p90": 189.12059173583984, "max": 325.9127502441406, "pos_frac": 0.7421875, "sample": [-6.2877197265625, 148.79443359375, 159.24722290039062, -50.70068359375, -73.18389892578125, 39.546875, 74.97283935546875, -13.5877685546875, 204.14720153808594, -181.32940673828125, 152.79820251464844, 195.75059509277344, -72.5224609375, 207.32424926757812, -2.0504989624023438, 29.770065307617188, 0.9321556091308594, 51.288856506347656, 66.07928466796875, -94.25252532958984, 5.9402008056640625, -16.080078125, 32.52642059326172, 70.63092041015625, 186.88787841796875, 108.57432556152344, 163.05520629882812, 13.089996337890625, 167.44696044921875, 57.35813903808594, 13.341226577758789, -37.21324157714844, 160.15411376953125, 79.19046020507812, 124.88418579101562, 105.65863037109375, -53.6492919921875, -176.49664306640625, 125.84982299804688, 43.186866760253906, 99.85340881347656, 186.1274871826172, -39.649085998535156, 204.10006713867188, 167.20645141601562, 150.32424926757812, -93.62283325195312, 9.509281158447266, 108.09109497070312, 52.194374084472656, -42.996612548828125, -156.32369995117188, 58.0367431640625, 7.846891403198242, 214.1898193359375, 88.4581298828125, 91.7017822265625, 137.09783935546875, -72.353271484375, 30.731918334960938, 127.04934692382812, -63.06268310546875, -19.60749053955078, -102.24559020996094, 127.267333984375, -19.291580200195312, 90.87811279296875, -39.27192687988281, 288.77032470703125, 143.026611328125, 69.37248229980469, 141.50006103515625, 49.0118408203125, 225.5814208984375, 153.7357635498047, -556.8269653320312, -179.80526733398438, -43.5845947265625, 114.44062042236328, 114.32455444335938, 180.96917724609375, 79.6646728515625, 190.4401092529297, 325.9127502441406, 120.60400390625, 89.02777099609375, 112.73779296875, 125.68344116210938, -24.850006103515625, 201.79046630859375, -64.20378112792969, 46.20908737182617, 138.1436309814453, -2.496917724609375, 57.83917236328125, 79.67277526855469, 15.510009765625, -83.57627868652344, 10.190057754516602, 134.80906677246094, 53.15338134765625, 60.317047119140625, 17.581741333007812, 26.504531860351562, 198.51217651367188, 62.707275390625, 150.62701416015625, 81.70185852050781, 197.30628967285156, 14.817489624023438, 113.65570068359375, 173.02801513671875, -23.901611328125, 107.318603515625, 16.242523193359375, 138.12408447265625, -56.971656799316406, -186.77816772460938, 56.275482177734375, 168.05613708496094, 118.806884765625, 278.81756591796875, -17.121856689453125, 188.55508422851562, 42.38250732421875, 40.395957946777344, 96.407470703125, 152.34397888183594], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000256.npy"}
{"epoch": 0.5361256544502618, "step": 257, "batch_size": 128, "mean": 59.49257278442383, "std": 88.4520492553711, "min": -146.96786499023438, "p10": -35.7125862121582, "median": 55.081024169921875, "p90": 191.62069702148435, "max": 341.2801513671875, "pos_frac": 0.734375, "sample": [59.76826477050781, 189.05255126953125, 167.2533416748047, 64.07139587402344, 103.22671508789062, 53.70440673828125, 78.95816040039062, 148.2122802734375, 59.991912841796875, -73.71875, -39.5653076171875, 106.07400512695312, 36.248313903808594, -139.5574951171875, 81.32644653320312, 203.18048095703125, -74.06629180908203, -26.485809326171875, 76.8489990234375, -24.665380477905273, 205.64471435546875, 81.32717895507812, 3.0925445556640625, -38.25848388671875, 92.09713745117188, 112.27485656738281, 216.14886474609375, -9.274017333984375, -9.4822998046875, 210.47421264648438, 24.8802490234375, -26.3255615234375, 22.902328491210938, 246.31402587890625, 19.3756103515625, 171.51773071289062, 187.814208984375, 76.13566589355469, 171.9156494140625, 94.98622131347656, 216.5252685546875, -3.79827880859375, 28.239501953125, 341.2801513671875, 67.52974700927734, 23.676101684570312, 102.04501342773438, 71.73110961914062, -26.848915100097656, 110.53129577636719, 2.0305938720703125, 17.02215576171875, -50.741546630859375, 30.322418212890625, 121.79098510742188, 50.5985107421875, 56.7139892578125, -2.28173828125, -7.35443115234375, 159.50189208984375, 104.48226928710938, -25.52423095703125, 38.32622528076172, 31.16131591796875, 108.04263305664062, 188.20404052734375, 3.406951904296875, 222.0648193359375, 56.4576416015625, -8.89276123046875, -144.59225463867188, 98.54534912109375, -49.141510009765625, -32.34185791015625, 66.72296142578125, 113.36672973632812, 80.69354248046875, -46.25032043457031, 64.48907470703125, 126.81613159179688, 77.13642883300781, 131.13369750976562, 14.318500518798828, 77.6990966796875, 3.896148681640625, 62.32562255859375, -34.722808837890625, -16.636474609375, 94.22225952148438, -8.3829345703125, -7.859466552734375, 200.7125244140625, 99.10050964355469, 22.849349975585938, -19.1466121673584, 58.79310607910156, 47.1226806640625, 107.28103637695312, 8.223939895629883, 28.87994384765625, 223.4578857421875, 49.85186767578125, 6.919586181640625, 15.409713745117188, 11.460601806640625, 0.2628154754638672, -36.64263153076172, 3.43896484375, -14.825157165527344, 204.88238525390625, 166.46096801757812, 93.22918701171875, -146.96786499023438, -7.7864227294921875, 112.78173828125, 148.58193969726562, -89.170654296875, 197.613037109375, 30.0516357421875, 85.66104125976562, -33.336578369140625, 175.75494384765625, 9.902950286865234, 249.061279296875, -52.04741668701172, -35.313995361328125, 12.316940307617188, 79.1249008178711], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000257.npy"}
{"epoch": 0.5382198952879581, "step": 258, "batch_size": 128, "mean": 62.26392364501953, "std": 86.22927856445312, "min": -225.8922119140625, "p10": -52.35433654785156, "median": 62.89442443847656, "p90": 159.65452270507814, "max": 264.9088134765625, "pos_frac": 0.7578125, "sample": [198.38095092773438, -6.92939567565918, 62.83796691894531, -17.572998046875, -63.429168701171875, 148.42214965820312, 201.04208374023438, 51.44586181640625, -34.27952575683594, 143.1239013671875, 120.266845703125, 93.75192260742188, 39.03326416015625, 73.86125183105469, 79.28231048583984, 3.268960952758789, 173.22491455078125, -16.40125274658203, 160.22613525390625, 174.01187133789062, 133.43133544921875, 134.47622680664062, -76.6622085571289, 62.95088195800781, 6.164556503295898, -56.32952880859375, 83.06269836425781, 144.96478271484375, 256.50421142578125, 93.012451171875, 128.796142578125, 255.65615844726562, 70.11009216308594, -52.02490234375, 119.43603515625, -19.880340576171875, -2.4486923217773438, -37.0333251953125, -1.3069419860839844, 140.37588500976562, 264.9088134765625, 165.32278442382812, 6.4631195068359375, 27.57183837890625, 124.28262329101562, 37.626953125, -159.49923706054688, 98.31468200683594, 0.200592041015625, 103.51861572265625, 83.63388061523438, 118.447998046875, -81.50601196289062, -10.276214599609375, 53.36937713623047, 46.961639404296875, -26.374679565429688, 12.982154846191406, 143.94442749023438, 133.91348266601562, 87.48206329345703, 37.0897216796875, 78.29350280761719, 139.15870666503906, 58.89930725097656, 254.7845458984375, 32.704559326171875, 88.22360229492188, 175.03579711914062, 40.27558517456055, 5.256599426269531, 129.4114532470703, 160.23806762695312, -6.833984375, -225.8922119140625, 136.1290283203125, 127.52825927734375, -28.601806640625, -81.260986328125, -53.123016357421875, -3.3165969848632812, 19.092025756835938, 27.52867317199707, 101.5770263671875, 204.10577392578125, 20.184600830078125, 100.42390441894531, 87.47885131835938, 131.89682006835938, 49.510589599609375, -4.169391632080078, -44.68428039550781, -105.62557983398438, 127.92465209960938, 81.6153793334961, 78.67047119140625, 145.23834228515625, 38.013275146484375, 143.77301025390625, 59.952674865722656, 59.681884765625, 132.73834228515625, 102.391845703125, 108.8585205078125, 105.15371704101562, -57.85255432128906, 159.4095458984375, 154.14791870117188, -66.56544494628906, 19.356590270996094, 44.574462890625, 52.47381591796875, 137.65570068359375, -69.22674560546875, 119.76776885986328, 65.30035400390625, 136.75491333007812, 56.18287658691406, 86.8037109375, -49.79457092285156, 32.747650146484375, 6.0080718994140625, 16.663925170898438, 32.683349609375, -2.05645751953125, 12.997329711914062, 132.07192993164062, -83.76544189453125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000258.npy"}
{"epoch": 0.5403141361256545, "step": 259, "batch_size": 128, "mean": 56.33197021484375, "std": 89.02039337158203, "min": -175.01736450195312, "p10": -43.563801193237296, "median": 52.796688079833984, "p90": 169.91361846923826, "max": 445.74169921875, "pos_frac": 0.671875, "sample": [130.04290771484375, 5.5852508544921875, 124.40213012695312, 46.625057220458984, -39.86456298828125, 130.06854248046875, 114.46160888671875, -8.0089111328125, 47.96954345703125, -78.78990173339844, 74.25714111328125, 79.99705505371094, -8.225273132324219, 78.46255493164062, 118.12493133544922, -5.6483917236328125, 15.730194091796875, -23.739368438720703, 143.00473022460938, -65.87466430664062, 19.116409301757812, -33.566375732421875, 150.6881561279297, 91.173095703125, 61.60899353027344, 101.57177734375, -54.148284912109375, 123.51510620117188, 26.822021484375, 23.991912841796875, 11.68446159362793, -13.375396728515625, -53.87986755371094, 46.85693359375, 94.17854309082031, 177.7461395263672, -36.19384765625, -3.2622146606445312, 45.60040283203125, 58.52081298828125, -3.771453857421875, 147.31826782226562, 79.75750732421875, -41.13899612426758, 185.5775146484375, -66.44921875, 63.35340881347656, -65.25645446777344, -35.5366325378418, 99.8316650390625, 177.87911987304688, 43.8155517578125, 67.98792266845703, 98.27301025390625, -64.00032043457031, 242.1158447265625, 106.7286376953125, 60.93902587890625, 152.9075927734375, -145.71397399902344, 79.12283325195312, 31.633087158203125, 74.58797454833984, -33.418426513671875, 141.75006103515625, -14.44476318359375, 74.260009765625, 129.93740844726562, -49.2216796875, 58.9207763671875, 115.84759521484375, 9.717042922973633, 160.71142578125, 29.76885986328125, -12.584075927734375, 81.24624633789062, 76.79107666015625, 197.49826049804688, 164.27931213378906, 2.2179946899414062, -31.549148559570312, 140.97779846191406, 186.3232421875, -1.227813720703125, -5.8846435546875, 47.50830078125, 151.82992553710938, -24.34124755859375, -1.5282058715820312, 110.0498046875, -175.01736450195312, 13.529541015625, 181.3772430419922, -88.68487548828125, -38.689544677734375, 149.6370849609375, 87.67922973632812, 192.1015625, 29.12989044189453, -8.081298828125, -21.000732421875, -53.76104736328125, 78.47174072265625, 180.778076171875, -95.47555541992188, -34.02867126464844, 10.788002014160156, -3.2619857788085938, 131.7208251953125, 54.131736755371094, -8.41510009765625, 51.73131561279297, 47.1839599609375, 0.0, -6.78955078125, 166.55682373046875, 86.19058227539062, 108.42901611328125, 64.65374755859375, 199.0828094482422, 186.34652709960938, 445.74169921875, 34.361572265625, 53.862060546875, -16.576187133789062, 231.38983154296875, 118.06600952148438, 144.70681762695312], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000259.npy"}
{"epoch": 0.5424083769633508, "step": 260, "batch_size": 128, "mean": 27.823530197143555, "std": 91.34517669677734, "min": -194.25311279296875, "p10": -89.12639923095703, "median": 18.506362915039062, "p90": 153.57613220214841, "max": 244.16140747070312, "pos_frac": 0.625, "sample": [129.55096435546875, -53.66796875, 151.6607666015625, -77.47483825683594, 78.85305786132812, 12.655721664428711, 36.14006042480469, 112.88310241699219, 99.25666809082031, 224.83309936523438, -9.45574951171875, 24.258514404296875, -57.85009765625, 70.7381591796875, -15.45721435546875, -67.71539306640625, 11.840415954589844, -88.89125061035156, 95.70843505859375, 135.78619384765625, -26.415634155273438, 81.94688415527344, -25.13446044921875, 47.3673095703125, -130.8800048828125, -62.71812438964844, 242.380859375, 16.312667846679688, 92.59414672851562, -13.428009033203125, -89.67507934570312, 44.3909912109375, -99.95816040039062, 43.810585021972656, -38.00640869140625, 189.90762329101562, -129.9718017578125, -101.45181274414062, -126.86567687988281, -3.2537307739257812, 209.7034454345703, -194.25311279296875, 8.101509094238281, 100.740234375, 175.85052490234375, 158.04531860351562, 6.848155975341797, 11.045135498046875, -5.0771484375, 222.9766845703125, 42.68800354003906, 170.6982421875, 127.54840087890625, 142.6943817138672, 73.052001953125, 20.700057983398438, -21.72998046875, 39.33538818359375, -32.83399963378906, 5.8678131103515625, -125.28958129882812, -116.00112915039062, 8.3402099609375, 8.918212890625, 20.755447387695312, 76.11233520507812, 21.50579833984375, 11.914794921875, 80.80014038085938, 65.311279296875, 61.9896240234375, -62.388427734375, 217.88720703125, -117.40838623046875, 9.631000518798828, 54.09907150268555, 244.16140747070312, -65.42230224609375, 185.13571166992188, 105.08163452148438, -61.67718505859375, 6.703887939453125, 63.59619140625, -19.247283935546875, 53.10418701171875, -102.34100341796875, 2.9639739990234375, 78.84292602539062, -9.975242614746094, -80.63922119140625, -51.00408935546875, 2.82098388671875, 130.25860595703125, -66.47442626953125, 25.72186279296875, 74.97404479980469, 0.0, -28.332305908203125, 33.6031494140625, 54.5089111328125, -130.20262145996094, 72.29161071777344, -50.983943939208984, -53.62002182006836, 70.84576416015625, 92.04383087158203, 73.45755004882812, -108.53424072265625, -68.68814086914062, 13.413944244384766, 170.525634765625, -3.533050537109375, -23.02419662475586, 147.71469116210938, 23.20587158203125, -14.183349609375, 50.15125274658203, 118.05838012695312, -22.259872436523438, 92.31802368164062, -47.8662109375, 203.58453369140625, 61.376434326171875, 29.116668701171875, 129.17788696289062, -83.18656921386719, 38.790557861328125, 2.2734375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000260.npy"}
{"epoch": 0.5445026178010471, "step": 261, "batch_size": 128, "mean": 48.62236785888672, "std": 96.99173736572266, "min": -178.2040557861328, "p10": -73.00710906982421, "median": 39.951637268066406, "p90": 190.75618896484374, "max": 290.152099609375, "pos_frac": 0.7109375, "sample": [1.377777099609375, -178.2040557861328, -8.086685180664062, -71.66950988769531, 263.06060791015625, 29.196556091308594, 151.8834228515625, 203.87872314453125, 22.15314483642578, 52.021514892578125, 151.29678344726562, 57.26019287109375, -21.930137634277344, 39.245304107666016, 76.02688598632812, -59.957305908203125, 58.75849914550781, 40.28413391113281, 48.164642333984375, -40.18793487548828, 128.26089477539062, 119.54029846191406, 110.18798828125, -71.46051025390625, 96.671142578125, -13.827621459960938, 11.666946411132812, -116.67572021484375, 47.45573425292969, -129.8278350830078, 64.18463134765625, 3.3638458251953125, 69.76669311523438, 39.619140625, 145.2684326171875, -19.449752807617188, 10.079185485839844, 12.00628662109375, -133.29507446289062, 1.1528778076171875, 170.54954528808594, -143.0352783203125, 140.36434936523438, -31.72601318359375, 111.732421875, 167.1744384765625, 192.06134033203125, 49.861328125, 133.80088806152344, -34.56080627441406, 108.50347900390625, 275.7947998046875, -8.422454833984375, 8.485424041748047, 23.139347076416016, 128.51922607421875, 195.44290161132812, 124.13369750976562, 89.09292602539062, 54.203643798828125, -18.839874267578125, 132.3739013671875, 29.303253173828125, 100.4986343383789, 69.31121826171875, 49.56504821777344, 52.52442169189453, 0.0, 24.747791290283203, 47.808258056640625, 195.09738159179688, -82.89617919921875, 105.19456481933594, 23.751014709472656, -57.35096740722656, 185.22288513183594, 13.202110290527344, -91.78421020507812, 206.61862182617188, 131.80076599121094, 69.15338134765625, 209.47232055664062, 108.64553833007812, 65.68765258789062, 17.900131225585938, 230.910400390625, -19.446182250976562, 192.54766845703125, 246.14315795898438, -89.88525390625, -16.171340942382812, 52.339508056640625, 96.71412658691406, 290.152099609375, 8.700363159179688, -40.21746826171875, -41.331275939941406, -125.41204833984375, 11.391775131225586, 83.65042114257812, 75.48268127441406, 3.5461578369140625, 6.808889389038086, 225.70701599121094, -41.05601501464844, -107.27900695800781, 2.3417816162109375, 149.61045837402344, 190.19683837890625, 42.54719543457031, 33.266387939453125, -55.48939514160156, -76.128173828125, 47.099666595458984, -23.202239990234375, 27.420021057128906, 182.66119384765625, 3.4972000122070312, 154.81695556640625, -31.230247497558594, -77.92141723632812, -25.606475830078125, -117.4971923828125, 161.55337524414062, 9.119184494018555, -27.331161499023438, 68.08863830566406, 11.171958923339844], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000261.npy"}
{"epoch": 0.5465968586387434, "step": 262, "batch_size": 128, "mean": 64.7728271484375, "std": 88.76087951660156, "min": -173.52841186523438, "p10": -31.97101745605468, "median": 62.41185760498047, "p90": 193.9468780517578, "max": 276.434326171875, "pos_frac": 0.75, "sample": [181.49984741210938, 91.9384765625, 113.48516845703125, 3.550567626953125, 61.42982482910156, -28.96820068359375, 2.618072509765625, -80.67198181152344, 228.39346313476562, 83.48703002929688, -2.0835418701171875, 94.91804504394531, 39.2606201171875, -77.34991455078125, 113.8389892578125, 217.94171142578125, -27.48247528076172, -0.294189453125, 143.54299926757812, 65.23275756835938, -28.4642333984375, 44.67425537109375, 6.7659149169921875, 159.20034790039062, -95.51162719726562, -54.74150085449219, 0.0, 3.7002944946289062, 276.434326171875, 156.66775512695312, 123.45755004882812, 111.7640380859375, 14.707611083984375, 59.60352325439453, 112.732177734375, 223.19476318359375, -2.01898193359375, 28.78240966796875, -21.98863983154297, 67.64352416992188, 107.06549072265625, -46.6514892578125, -113.80465698242188, 176.33523559570312, 256.67022705078125, 164.82015991210938, -58.77069091796875, 46.57859802246094, 194.13986206054688, 112.8856201171875, 43.350677490234375, -56.921142578125, -29.094928741455078, 3.011688232421875, 64.40496826171875, 90.36154174804688, 228.66973876953125, 34.011749267578125, 72.64370727539062, 63.393890380859375, 257.9610595703125, -29.708236694335938, 202.17547607421875, 108.23994445800781, 32.342437744140625, 94.8106689453125, 189.71502685546875, -20.7509765625, 90.74163818359375, -7.215038299560547, 71.2451171875, -94.88705444335938, 27.8154296875, 183.78616333007812, 24.109649658203125, 44.379974365234375, 24.39129638671875, 95.6251220703125, -49.45880126953125, 31.113525390625, 173.70272827148438, 37.98779296875, 85.76483154296875, 1.732635498046875, -60.28509521484375, 75.22821044921875, 67.40093994140625, 68.550537109375, 23.650165557861328, -6.676666259765625, 194.09408569335938, 73.92193603515625, 95.39697265625, -173.52841186523438, 67.35745239257812, 52.769134521484375, 100.82872772216797, 36.00413513183594, -29.20068359375, 2.5100746154785156, 149.82098388671875, 3.796875, 88.6295166015625, 2.699310302734375, 65.62310791015625, 211.06744384765625, 114.82858276367188, -37.25083923339844, 161.617431640625, -10.0084228515625, 36.781036376953125, 96.81866455078125, 4.1777191162109375, 207.32098388671875, 13.63983154296875, -0.520721435546875, 165.43283081054688, 145.0436553955078, 25.490554809570312, 122.52816772460938, 193.8837890625, -8.8521728515625, 163.90530395507812, 166.03924560546875, 69.82501220703125, 233.886474609375, -1.780029296875, -21.15582275390625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000262.npy"}
{"epoch": 0.5486910994764398, "step": 263, "batch_size": 128, "mean": 51.579429626464844, "std": 92.6222152709961, "min": -186.4278564453125, "p10": -64.15454559326172, "median": 47.52043151855469, "p90": 167.7667236328125, "max": 296.73193359375, "pos_frac": 0.7109375, "sample": [211.70916748046875, 150.2117919921875, 193.2164306640625, 11.03179931640625, 149.06524658203125, 173.72671508789062, 166.824462890625, 65.49111938476562, -0.7297115325927734, -49.730499267578125, 74.05093383789062, 19.921875, 210.55221557617188, 39.10105895996094, 127.11508178710938, -62.050384521484375, -10.3148193359375, 20.09088134765625, 6.3365478515625, -10.69232177734375, -74.53097534179688, 151.03848266601562, 37.717864990234375, -171.4942626953125, -138.66873168945312, 47.803253173828125, -113.15985107421875, 46.736083984375, 115.14549255371094, 0.0, 62.5865478515625, 163.07769775390625, 21.836711883544922, 31.99591064453125, -30.501800537109375, 39.354278564453125, 152.660400390625, 70.16694641113281, -74.09027099609375, 90.07220458984375, 118.92755126953125, 18.564697265625, 17.838111877441406, 190.17535400390625, -69.06425476074219, 134.71051025390625, 94.12075805664062, 133.89950561523438, -5.015869140625, 63.04362487792969, 79.81446838378906, -6.2346343994140625, 85.09503173828125, -61.189178466796875, 0.0, 36.58934783935547, -16.309310913085938, -9.155895233154297, -101.14683532714844, 155.46417236328125, 87.39222717285156, 138.0571746826172, 135.7698211669922, 36.624000549316406, 111.84830474853516, 136.7933349609375, -0.2547168731689453, 47.95997619628906, 45.23381042480469, 7.133819580078125, 35.259307861328125, 264.0068664550781, -18.080596923828125, 120.1319580078125, 109.70016479492188, 91.94730377197266, -132.18563842773438, 54.84320068359375, 226.26235961914062, 145.0034637451172, -7.676414489746094, 39.665260314941406, -2.0987472534179688, 61.0032958984375, 114.28793334960938, 25.499267578125, 296.73193359375, 30.793930053710938, 187.77081298828125, 12.585433959960938, -19.8251953125, 155.85574340820312, 67.18659973144531, 177.52049255371094, 90.77595520019531, -9.091766357421875, 60.270751953125, 26.8367919921875, -37.1458740234375, 27.37432861328125, 182.50588989257812, 5.628902435302734, 92.20108032226562, -74.17973327636719, -34.708740234375, -96.145751953125, -178.92242431640625, 47.23760986328125, 0.0, 85.10211181640625, -179.21444702148438, 51.88745880126953, 64.64521789550781, 127.4617919921875, 96.9403076171875, -10.276046752929688, 169.96533203125, 80.54884338378906, 148.8621826171875, 72.24604034423828, 13.237485885620117, -186.4278564453125, 68.45681762695312, -60.45697021484375, 217.64547729492188, 23.284683227539062, 70.39801025390625, 89.67642211914062], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000263.npy"}
{"epoch": 0.5507853403141362, "step": 264, "batch_size": 128, "mean": 62.913597106933594, "std": 87.84554290771484, "min": -182.75389099121094, "p10": -28.968761444091793, "median": 55.440528869628906, "p90": 179.78280029296874, "max": 368.4318542480469, "pos_frac": 0.734375, "sample": [-1.7372608184814453, 55.586578369140625, 162.61700439453125, 57.569915771484375, 108.63587951660156, 368.4318542480469, 251.73330688476562, 0.0, 138.33267211914062, 83.06495666503906, -57.613929748535156, 14.727149963378906, -90.58380126953125, 56.295684814453125, 19.967987060546875, -5.81109619140625, 32.970916748046875, 27.33026123046875, -32.34423828125, 11.761833190917969, -0.9522666931152344, 44.263671875, 123.77776336669922, 84.04049682617188, 157.27618408203125, -28.334861755371094, 23.06545639038086, 114.23265075683594, 107.81536865234375, 90.80961608886719, 57.365997314453125, 282.6131591796875, 181.44049072265625, 153.83868408203125, 80.62291717529297, 66.47923278808594, 15.626312255859375, 241.40106201171875, -2.6676864624023438, 4.752922058105469, 85.277099609375, 137.00010681152344, -14.779083251953125, 1.9465065002441406, 180.41632080078125, 68.832275390625, 89.32427978515625, -136.69815063476562, 73.7271728515625, 179.51129150390625, 79.61265563964844, 7.9707489013671875, 208.32521057128906, 41.49755859375, -30.447860717773438, -30.802459716796875, -1.3338623046875, 229.629638671875, -12.32257080078125, 0.0, -3.5126953125, 58.873016357421875, 187.00384521484375, -15.912147521972656, -3.5723094940185547, -13.788299560546875, -44.805118560791016, 105.63772583007812, -30.5374755859375, 159.12326049804688, -57.555145263671875, 106.680908203125, -20.363540649414062, 159.674560546875, 147.42831420898438, 35.25677490234375, 135.4774627685547, 164.34100341796875, 7.98808479309082, 28.116905212402344, -69.707275390625, 2.1313438415527344, 25.54479217529297, -24.75567626953125, -11.700897216796875, 117.19879150390625, 12.456298828125, 66.75521850585938, 38.73529052734375, 75.16032409667969, -74.57711791992188, 43.0384521484375, -26.748123168945312, 34.54484558105469, 55.29447937011719, 83.49382019042969, 74.74349975585938, 25.56390380859375, 34.04864501953125, 119.16329956054688, 11.293533325195312, 243.637939453125, -182.75389099121094, 124.07595825195312, 30.8841552734375, 150.50552368164062, -17.22564697265625, -42.74418640136719, 104.31829833984375, 23.332557678222656, 63.292259216308594, 67.31538391113281, -26.663318634033203, 19.788864135742188, 65.9519271850586, 144.32708740234375, 19.6568603515625, 224.26327514648438, 70.82392883300781, 180.9503173828125, 87.78471374511719, 173.02896118164062, 47.3343505859375, 90.2967529296875, 234.104248046875, -20.99138069152832, 145.78372192382812, 159.53994750976562], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000264.npy"}
{"epoch": 0.5528795811518324, "step": 265, "batch_size": 128, "mean": 69.47211456298828, "std": 101.353271484375, "min": -171.7916259765625, "p10": -47.80624084472656, "median": 67.67964935302734, "p90": 199.04012451171874, "max": 326.79803466796875, "pos_frac": 0.703125, "sample": [2.942209243774414, 148.97607421875, 293.7847900390625, -12.92938232421875, 180.0517120361328, 103.880615234375, 204.2571563720703, 137.23284912109375, 170.52835083007812, 326.79803466796875, 114.31118774414062, 77.08014678955078, -24.537582397460938, 71.0860824584961, 2.1680908203125, -57.8189697265625, -13.37261962890625, -36.50994873046875, 186.45855712890625, 145.8958740234375, 53.856201171875, 236.326416015625, -3.93060302734375, -68.94656372070312, 148.96173095703125, 53.240081787109375, 68.47737121582031, 13.766159057617188, 123.08697509765625, 7.542106628417969, 25.322265625, 172.58294677734375, 66.88192749023438, 3.6168212890625, 152.21163940429688, 23.640289306640625, 101.28619384765625, 47.956817626953125, 107.477294921875, 178.64239501953125, 229.3759765625, 248.30886840820312, 216.08328247070312, 112.51008605957031, -61.419219970703125, 10.307861328125, 14.24749755859375, 224.19741821289062, 106.05403137207031, -35.921173095703125, -46.477447509765625, 182.5771942138672, -3.94659423828125, 84.84634399414062, 76.39179229736328, 124.81488037109375, 143.44993591308594, 216.68649291992188, 0.0, -1.6821212768554688, 27.39791488647461, 179.26124572753906, 40.20091247558594, 135.2643585205078, 88.48680114746094, -29.400344848632812, 151.2902374267578, 106.96650695800781, -40.209197998046875, 38.148345947265625, -86.065185546875, 122.82501220703125, 194.43206787109375, 39.51490783691406, 194.74371337890625, 164.67076110839844, 159.4237060546875, 2.194366455078125, -38.7078857421875, -49.2642822265625, 255.304931640625, 74.84669494628906, 0.0, -25.468585968017578, -32.9622802734375, -27.235153198242188, -119.03138732910156, 197.25552368164062, -171.7916259765625, 112.91665649414062, 96.9119873046875, -85.35858154296875, 203.20419311523438, -61.98724365234375, 56.685791015625, 45.824859619140625, 144.85562133789062, -16.286376953125, 170.87570190429688, -0.45733642578125, 70.50576782226562, 46.43156433105469, 20.1605224609375, -36.7589111328125, 162.49490356445312, 17.263946533203125, -3.6458816528320312, 105.69029235839844, 123.68109893798828, 129.14083862304688, -47.181365966796875, 160.41891479492188, -125.27407836914062, -129.93988037109375, 229.4263916015625, 267.3810729980469, 127.28262329101562, -86.48959350585938, -12.928878784179688, 49.6201171875, -43.52996826171875, -6.172203063964844, 110.00204467773438, 7.782508850097656, 22.5291690826416, -131.8214111328125, 100.85490417480469, 173.54141235351562], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000265.npy"}
{"epoch": 0.5549738219895288, "step": 266, "batch_size": 128, "mean": 66.59730529785156, "std": 86.87285614013672, "min": -188.94610595703125, "p10": -26.30307216644287, "median": 56.960968017578125, "p90": 180.35341644287104, "max": 349.860595703125, "pos_frac": 0.796875, "sample": [56.31719970703125, 57.699127197265625, 206.30136108398438, 129.25064086914062, 47.4097900390625, 73.14703369140625, 6.920196533203125, 225.4317626953125, -49.6650390625, 18.178897857666016, 113.2215576171875, -188.94610595703125, 14.195228576660156, 37.65167236328125, 62.745025634765625, 114.85163879394531, 35.96327209472656, 213.1788330078125, 146.51919555664062, 117.93783569335938, 56.738372802734375, 168.50631713867188, 51.74658203125, -16.241363525390625, -168.38778686523438, 71.28427124023438, 84.51150512695312, 223.1951446533203, 262.996337890625, 11.96197509765625, 106.24136352539062, 26.21466064453125, 0.2696533203125, -58.62579345703125, 92.38130187988281, -25.711883544921875, -86.291015625, 22.14562225341797, 45.63105773925781, 139.98214721679688, 175.32188415527344, 7.655975341796875, 60.728179931640625, 55.8929443359375, -95.95658874511719, -78.3140869140625, 19.875415802001953, 3.564056396484375, -29.84735107421875, 54.798248291015625, 161.5618896484375, 223.7767333984375, -17.56121826171875, 87.20391845703125, 71.30029296875, 136.48846435546875, 70.01907348632812, 148.04537963867188, 60.396759033203125, 85.19302368164062, 173.75537109375, 17.44298553466797, 17.933948516845703, 95.69692993164062, -58.62148666381836, -1.6778488159179688, -2.037628173828125, 59.80828857421875, 45.398956298828125, 19.8922119140625, 12.379104614257812, 72.30589294433594, 101.91278076171875, 47.719696044921875, -21.701263427734375, 163.88726806640625, -57.27477264404297, 55.18208312988281, 43.46063232421875, 86.92213439941406, 92.79681396484375, 84.67825317382812, 110.28433990478516, 204.32667541503906, 23.94963264465332, -5.6512298583984375, -2.8313941955566406, -15.6690673828125, 234.97036743164062, 57.8746337890625, 98.30490112304688, 41.867431640625, 170.4581298828125, -17.72943115234375, 27.052688598632812, 52.333824157714844, 12.718399047851562, 39.3115234375, 61.86195373535156, -7.70623779296875, 134.31997680664062, 57.183563232421875, 76.96133422851562, 268.7210388183594, 349.860595703125, 134.90719604492188, 82.0878677368164, 230.53146362304688, 134.16253662109375, 106.14131164550781, 34.93186950683594, 27.391815185546875, 52.74212646484375, -18.2286376953125, -29.180389404296875, -27.682512283325195, 46.531524658203125, -10.563232421875, 141.67691040039062, 131.4132080078125, -28.441892623901367, 33.94537353515625, 82.01556396484375, 261.5559997558594, 122.93208312988281, 79.08578491210938, 74.7708740234375, 192.09365844726562], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000266.npy"}
{"epoch": 0.5570680628272251, "step": 267, "batch_size": 128, "mean": 52.84173583984375, "std": 89.05720520019531, "min": -192.72320556640625, "p10": -45.325903701782224, "median": 48.046051025390625, "p90": 156.39752349853515, "max": 348.86785888671875, "pos_frac": 0.75, "sample": [2.065439224243164, 73.41897583007812, 129.75299072265625, 82.3138427734375, 69.21746826171875, -36.970733642578125, 24.790481567382812, 63.65577697753906, 27.54314422607422, 145.56329345703125, 86.6522216796875, 81.80230712890625, 160.14027404785156, 129.67811584472656, -45.12520980834961, -88.9349365234375, 141.2711944580078, 8.735580444335938, 75.61013793945312, -113.97584533691406, 96.1058349609375, 112.2026596069336, 174.64205932617188, -14.658294677734375, -70.03773498535156, -6.626220703125, 13.369293212890625, 120.18511962890625, 51.685211181640625, 26.639022827148438, -1.3656005859375, 9.762214660644531, 348.86785888671875, 102.25697326660156, 24.27092742919922, 95.11952209472656, -13.367267608642578, 190.17071533203125, 152.11557006835938, 102.39808654785156, -43.58439636230469, 229.50360107421875, 52.51409912109375, 25.67913818359375, 73.10968017578125, 86.46133422851562, 28.684112548828125, 259.68695068359375, 118.44924926757812, 38.196136474609375, 36.915863037109375, 111.21527099609375, 103.75435638427734, 76.59152221679688, -36.83135986328125, 25.17279052734375, 35.8341064453125, 111.65750122070312, -89.21841430664062, 154.79348754882812, 162.43365478515625, 123.1802749633789, -45.794189453125, -42.004364013671875, 45.3277587890625, -20.637664794921875, -10.241790771484375, 68.2999267578125, 13.380531311035156, 183.4912109375, 30.617202758789062, 104.10171508789062, 17.964889526367188, -56.665802001953125, 67.59078979492188, 103.66128540039062, 104.43794250488281, 41.718719482421875, 146.31338500976562, 79.74723815917969, 141.47323608398438, -142.71026611328125, -33.845977783203125, 127.05221557617188, 6.972389221191406, 42.399688720703125, 140.95840454101562, 69.77391052246094, 16.556396484375, 73.2430419921875, 11.875564575195312, -28.523712158203125, 137.82501220703125, 71.79403686523438, 10.49267578125, 4.1934356689453125, -40.38958740234375, 152.0897979736328, -5.852434158325195, 8.91668701171875, 11.1357421875, 280.7083740234375, 54.1314697265625, 34.916473388671875, 112.5863037109375, -192.72320556640625, 50.76434326171875, 8.549514770507812, 188.9388885498047, 82.36486053466797, -168.15191650390625, 16.5855712890625, 93.5025634765625, 199.4859619140625, -13.079158782958984, 6.030294418334961, -3.1381759643554688, 238.926025390625, -76.76253509521484, -66.49287414550781, 86.09281921386719, -42.939483642578125, -11.52630615234375, 170.81170654296875, 71.37676239013672, -130.73046875, -64.97369384765625, 16.609619140625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000267.npy"}
{"epoch": 0.5591623036649215, "step": 268, "batch_size": 128, "mean": 66.83665466308594, "std": 91.77606201171875, "min": -153.86537170410156, "p10": -27.121328735351558, "median": 59.376731872558594, "p90": 199.73980712890622, "max": 305.4971923828125, "pos_frac": 0.7734375, "sample": [99.58815002441406, 19.207908630371094, 19.30624771118164, 107.9090576171875, 138.86383056640625, 71.83909606933594, -46.57347106933594, -8.493576049804688, 305.4971923828125, -6.565361022949219, 2.836944580078125, 121.480224609375, 43.160308837890625, 118.95491790771484, 156.34078979492188, 62.604034423828125, -8.763397216796875, 121.55911254882812, 116.01771545410156, 125.08171081542969, 46.69732666015625, -5.8857879638671875, 87.63983154296875, 44.193359375, 8.597488403320312, 122.35455322265625, 132.93792724609375, -7.360538482666016, 126.12271118164062, 44.649932861328125, 16.32763671875, 65.91136169433594, 17.301849365234375, -25.82440185546875, 17.05874252319336, -22.564605712890625, -17.848670959472656, -110.43301391601562, 39.54034423828125, 221.9837646484375, 164.8533935546875, 119.6004638671875, 117.14273071289062, -153.86537170410156, 89.13259887695312, 240.48687744140625, -9.797088623046875, 28.76763916015625, -52.61616516113281, -18.96026611328125, 205.51895141601562, 124.26524353027344, 3.8193817138671875, 11.530662536621094, 20.95428466796875, 12.833969116210938, 40.6695556640625, 62.80494689941406, -88.36332702636719, 182.74642944335938, -124.27278137207031, -10.0810546875, 284.0555419921875, 49.04600524902344, -7.138519287109375, -63.641090393066406, -21.86682891845703, 242.71322631835938, 105.69113159179688, 78.21257019042969, 86.9720458984375, 110.50921630859375, 269.82818603515625, 82.18597412109375, 3.778627395629883, 75.68975830078125, 240.99400329589844, -39.36846923828125, 54.46221923828125, -145.29476928710938, 233.53082275390625, 112.9317626953125, 85.47332763671875, -30.147491455078125, 12.640861511230469, 230.9762725830078, -0.003204345703125, 21.502975463867188, 80.98574829101562, 68.39898681640625, 139.9832763671875, 162.2890625, -20.471446990966797, -84.34796142578125, 56.14942932128906, 163.80552673339844, 93.11471557617188, 85.36734008789062, 20.09051513671875, 55.856048583984375, 70.19610595703125, 17.487640380859375, 171.49159240722656, -107.2403564453125, 77.50698852539062, 67.45592498779297, 25.518775939941406, 135.458740234375, 210.24612426757812, 101.96600341796875, 179.96279907226562, -43.163330078125, 71.28228759765625, 186.09512329101562, 194.46209716796875, 51.69122314453125, 6.161895751953125, 0.13014984130859375, 216.76040649414062, 6.566490173339844, 229.69845581054688, 79.63409423828125, 12.7628173828125, 123.2520751953125, 27.99530029296875, 197.26303100585938, 21.0709228515625, 0.0], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000268.npy"}
{"epoch": 0.5612565445026177, "step": 269, "batch_size": 128, "mean": 47.861759185791016, "std": 93.84581756591797, "min": -274.0832824707031, "p10": -56.68866958618164, "median": 46.59385681152344, "p90": 161.74274597167968, "max": 265.2330017089844, "pos_frac": 0.671875, "sample": [-147.70516967773438, 108.69915771484375, 120.97805786132812, -97.84979248046875, 95.77557373046875, -85.4834976196289, 117.68060302734375, 0.0, 30.075653076171875, 7.4637451171875, 202.21868896484375, 63.46074676513672, -54.70094680786133, 51.32379150390625, 141.598876953125, -22.728912353515625, 139.07540893554688, 16.844757080078125, -12.12176513671875, -3.3865203857421875, -96.87680053710938, 132.4451141357422, -4.7366943359375, 59.136016845703125, 5.3328857421875, -28.1441650390625, 174.787841796875, 34.45644760131836, 58.284820556640625, 29.057266235351562, 63.17854309082031, -48.03746795654297, 115.234375, 2.713897705078125, -47.20947265625, 116.32579040527344, 113.4398193359375, -108.76536560058594, 24.60999298095703, 156.96560668945312, 213.80416870117188, 144.81826782226562, 40.06523132324219, 131.51763916015625, 213.75912475585938, -166.10504150390625, -47.518890380859375, 194.86392211914062, 132.2064208984375, -1.715118408203125, 62.952301025390625, 15.5018310546875, 103.1607666015625, 116.44567108154297, -4.5930633544921875, -47.224517822265625, 122.35826110839844, 49.447998046875, 61.473052978515625, 65.8887939453125, -118.3638916015625, -32.167823791503906, 50.67645263671875, -37.541664123535156, 43.739715576171875, -22.8421630859375, 80.71914672851562, -4.7691650390625, -57.58733367919922, 37.100616455078125, 217.9322509765625, 193.3643341064453, 104.84356689453125, -33.629150390625, 123.44320678710938, 106.2185287475586, 165.18466186523438, 72.14717864990234, -85.06930541992188, -22.065441131591797, 0.0, 265.2330017089844, 28.466323852539062, 71.99774169921875, -3.0750770568847656, 27.005752563476562, -110.96856689453125, 147.61819458007812, 71.30929565429688, 99.66276550292969, 38.631561279296875, 110.59918212890625, 111.41400146484375, 199.4598846435547, 57.687164306640625, -274.0832824707031, 26.389984130859375, 157.5690460205078, -5.4091796875, 142.63235473632812, 34.8604736328125, -32.433494567871094, 123.5010986328125, -0.03748512268066406, 18.3433837890625, 96.5296630859375, 24.270919799804688, 203.15188598632812, 151.58029174804688, 49.45378112792969, 133.27340698242188, 34.19207763671875, -183.70086669921875, -40.23465347290039, 126.29971313476562, 143.45651245117188, -56.30352783203125, 176.49281311035156, -34.92388916015625, 96.424560546875, 14.242523193359375, -64.770751953125, 63.759483337402344, -39.25518798828125, -51.4384765625, 29.850357055664062, 181.45530700683594, 160.26763916015625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000269.npy"}
{"epoch": 0.5633507853403141, "step": 270, "batch_size": 128, "mean": 60.618648529052734, "std": 101.7587890625, "min": -212.73175048828125, "p10": -68.15792236328124, "median": 59.25208282470703, "p90": 179.94393005371091, "max": 375.61285400390625, "pos_frac": 0.734375, "sample": [-150.6422119140625, 91.3311767578125, 144.33447265625, 22.81275177001953, 72.54150390625, 9.705432891845703, 154.46878051757812, 35.01373291015625, -15.752532958984375, 216.84548950195312, 17.16808319091797, -68.17706298828125, 100.21735382080078, -82.87356567382812, -92.53399658203125, 204.518798828125, 225.20263671875, 108.04354858398438, 222.33413696289062, 113.75300598144531, 178.86489868164062, 119.96794128417969, 111.2664794921875, -10.42413330078125, 10.386383056640625, 20.131454467773438, 14.365493774414062, 74.52761840820312, 58.64332580566406, 117.6973876953125, 159.982666015625, 174.58294677734375, -73.35083770751953, 280.8927307128906, -70.49949645996094, 83.51416015625, -116.75576782226562, 49.84587097167969, -161.13565063476562, 167.38389587402344, -51.645904541015625, -1.8979415893554688, 50.110626220703125, 77.24385070800781, 111.46481323242188, 173.45664978027344, 10.096099853515625, -12.494369506835938, 183.985595703125, -45.20759582519531, 62.677032470703125, 81.03704833984375, 109.18368530273438, 138.27825927734375, -0.4588623046875, 105.37763977050781, 4.101676940917969, 43.54682922363281, 66.7430419921875, 28.856124877929688, 184.639404296875, 83.67691040039062, 7.766326904296875, 300.3701171875, 99.99459838867188, -100.85348510742188, 160.730224609375, 139.365966796875, -28.9644775390625, 126.06329345703125, 63.50190734863281, 10.618316650390625, -19.48516845703125, -3.837646484375, 149.69149780273438, 108.65008544921875, 112.16717529296875, -68.14971923828125, 61.318939208984375, 10.237411499023438, -168.80435180664062, 107.99596405029297, -3.388885498046875, 246.36773681640625, 2.4559173583984375, -212.73175048828125, 34.23480224609375, -12.3944091796875, 18.475929260253906, -29.387123107910156, -34.0062255859375, -10.952606201171875, -39.7117919921875, 375.61285400390625, 148.18246459960938, 174.8183135986328, 53.32708740234375, 37.58964538574219, 88.36732482910156, 91.70223999023438, 154.4544677734375, 57.09063720703125, -16.156742095947266, 276.7796630859375, -30.69891357421875, 15.82783317565918, 19.81890869140625, 150.1395721435547, 12.12200927734375, 0.221466064453125, 13.795654296875, 57.06133270263672, 103.2867431640625, -1.3780269622802734, 148.3232421875, -142.1732177734375, 115.81271362304688, 59.86083984375, 11.054412841796875, 176.093505859375, 182.461669921875, 237.43707275390625, -88.59068298339844, 126.45198059082031, 83.01066589355469, 105.94061279296875, 80.03662109375, -66.8048095703125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000270.npy"}
{"epoch": 0.5654450261780105, "step": 271, "batch_size": 128, "mean": 52.6430778503418, "std": 92.9485092163086, "min": -162.103271484375, "p10": -55.31815185546875, "median": 37.732948303222656, "p90": 182.55425720214842, "max": 306.02545166015625, "pos_frac": 0.71875, "sample": [35.80329895019531, -69.87080383300781, -5.0147705078125, 36.0811767578125, 2.70123291015625, 100.60784912109375, 77.3044204711914, 11.136444091796875, 86.0838623046875, 73.01065063476562, 39.38471984863281, -58.197662353515625, 53.64085388183594, 70.12286376953125, 30.672409057617188, 136.63748168945312, 56.302406311035156, -80.55557250976562, -38.3587646484375, 259.3455810546875, 25.15119171142578, 162.2158660888672, 42.18452453613281, 3.386688232421875, -30.35186767578125, 39.834442138671875, 76.96475219726562, -127.61885070800781, 131.11953735351562, -59.43769836425781, -13.334503173828125, 15.053726196289062, 18.5689697265625, -16.567459106445312, 31.01787567138672, -24.62676239013672, 88.939697265625, 25.189926147460938, 4.96240234375, -78.6083984375, 23.895193099975586, 2.7377471923828125, 194.95950317382812, 111.42498779296875, 125.87220764160156, 112.42701721191406, 306.02545166015625, 253.27450561523438, 65.02053833007812, 15.147735595703125, 171.52516174316406, 185.365478515625, 46.33935546875, -24.840286254882812, -159.48568725585938, 159.83151245117188, 252.053466796875, 47.351226806640625, -89.59546661376953, -15.692413330078125, 215.82318115234375, 181.82977294921875, -131.1813201904297, -40.0185546875, 53.70263671875, 7.550682067871094, 72.72018432617188, 35.887725830078125, 238.44570922851562, 61.967620849609375, -23.566558837890625, 166.61431884765625, 8.220863342285156, 11.191726684570312, -162.103271484375, 104.75091552734375, -6.3721923828125, 178.51211547851562, 14.85052490234375, 172.9288330078125, -48.772674560546875, 84.4208755493164, -5.790557861328125, 46.79937744140625, 184.24472045898438, 27.5194091796875, -80.97390747070312, 53.4964599609375, 22.807533264160156, -54.084075927734375, -9.165138244628906, 208.26498413085938, 32.20070266723633, -35.17859649658203, 259.89947509765625, 115.79086303710938, 130.658935546875, 113.95079040527344, -76.18292236328125, 126.28353881835938, 143.79205322265625, -17.512969970703125, 71.3480224609375, -1.0207366943359375, 33.561920166015625, 26.773773193359375, 199.26431274414062, 170.05625915527344, 90.66954040527344, -8.197113037109375, -3.71990966796875, 282.3991394042969, -96.06900024414062, 50.879669189453125, 49.4404296875, 19.020675659179688, 39.77996826171875, 68.0806884765625, 55.1551513671875, 78.69497680664062, 146.66864013671875, -32.8631591796875, -3.5043182373046875, 111.38882446289062, 12.55251693725586, 56.79730224609375, 33.24922180175781, -10.83575439453125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000271.npy"}
{"epoch": 0.5675392670157068, "step": 272, "batch_size": 128, "mean": 60.899871826171875, "std": 103.53997802734375, "min": -231.2159423828125, "p10": -46.42632751464843, "median": 53.25999450683594, "p90": 187.13352050781248, "max": 385.1060791015625, "pos_frac": 0.6875, "sample": [-14.578689575195312, 0.0, 40.82879638671875, 68.38441467285156, 104.126953125, 1.9370498657226562, 385.1060791015625, 66.46917724609375, 195.52203369140625, 73.16317749023438, 71.05911254882812, 43.505516052246094, 140.7308349609375, -0.5197963714599609, 236.66172790527344, 182.58526611328125, 45.37727355957031, -1.7905454635620117, 242.59423828125, 115.56698608398438, 34.909637451171875, 177.4052276611328, -23.151611328125, 10.645730972290039, 155.54522705078125, 123.78436279296875, 25.35540008544922, 185.96923828125, -131.0919189453125, -9.586410522460938, 115.40194702148438, -61.20306396484375, -223.19712829589844, 99.08834838867188, 319.44818115234375, -15.41510009765625, 202.35089111328125, 113.711181640625, 65.5362548828125, -11.380317687988281, 247.67330932617188, 21.84036636352539, -0.12091064453125, -66.13232421875, -22.00644302368164, 46.96171569824219, 148.7635955810547, 136.51748657226562, 94.84860229492188, 53.9852294921875, 233.80795288085938, -74.24247741699219, 0.0, -34.7081298828125, 106.87974548339844, -42.920562744140625, 3.54998779296875, -188.38031005859375, 19.276565551757812, 80.80062866210938, 68.33290100097656, 105.26270294189453, -4.360982894897461, 4.3731689453125, 124.05770874023438, -31.105636596679688, -2.8167266845703125, 10.98961067199707, 94.16822052001953, -113.2418212890625, -1.86993408203125, 186.15029907226562, 127.55924987792969, -27.12493133544922, 22.163097381591797, 201.44064331054688, -73.29948425292969, 84.69711303710938, 118.943359375, 17.758014678955078, 73.94186401367188, 108.70155334472656, -231.2159423828125, -58.20147705078125, -16.001556396484375, -8.63018798828125, 122.02743530273438, -8.449943542480469, 64.25579833984375, 13.165420532226562, -18.693191528320312, 103.96981048583984, 189.42770385742188, 108.17745971679688, 109.78779602050781, 14.26593017578125, 154.66677856445312, 177.0782470703125, 75.4266357421875, 232.56195068359375, 67.02603149414062, 51.64598083496094, -1.7798805236816406, -54.6064453125, -0.47183990478515625, 19.715179443359375, 137.55618286132812, 343.3460998535156, -8.639463424682617, 49.1846923828125, 42.289703369140625, -27.402374267578125, 44.971435546875, 52.534759521484375, -168.5177001953125, 160.5, 79.50281524658203, 81.99276733398438, 126.15299224853516, 216.49493408203125, 49.828857421875, -97.80624389648438, -36.655311584472656, 107.6102294921875, 77.727294921875, 184.68902587890625, 157.45681762695312, 105.24839782714844], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000272.npy"}
{"epoch": 0.5696335078534032, "step": 273, "batch_size": 128, "mean": 62.60122299194336, "std": 96.87471008300781, "min": -161.5850830078125, "p10": -47.058933639526366, "median": 58.1193962097168, "p90": 176.70122985839842, "max": 321.2838134765625, "pos_frac": 0.734375, "sample": [79.9117431640625, 163.35382080078125, 107.967041015625, 134.9386444091797, -25.606353759765625, 206.8994598388672, -25.679161071777344, -46.332763671875, 76.34100341796875, 162.49838256835938, 15.58203125, 34.01104736328125, -8.172813415527344, 23.1845703125, 102.424560546875, -54.5123291015625, -19.780029296875, 126.80889892578125, 128.97671508789062, -17.39722442626953, -77.734375, 106.28184509277344, 162.05377197265625, 12.398590087890625, -17.727294921875, 151.51644897460938, 96.57169342041016, -48.75333023071289, 4.8535003662109375, 311.02130126953125, 43.2318115234375, 131.2567138671875, 194.7302703857422, -16.61115264892578, 321.2838134765625, 51.48974609375, 19.0335693359375, -79.925537109375, -26.2706298828125, 73.63287353515625, -35.66133117675781, 13.942850112915039, 175.47320556640625, 21.953140258789062, 253.50247192382812, 28.17218017578125, 101.69633483886719, 6.700531005859375, 122.03852844238281, 7.2005615234375, 219.67962646484375, 42.202789306640625, 296.2886047363281, 104.00225830078125, 175.32659912109375, 150.55172729492188, 83.2884521484375, 78.59819793701172, 84.25064086914062, 60.766014099121094, 10.72821044921875, 127.69448852539062, 86.2225341796875, -15.87030029296875, 84.01214599609375, 96.13152313232422, -45.153045654296875, 5.227294921875, -59.18091583251953, -39.1383056640625, -25.67035675048828, 74.3812255859375, 156.97259521484375, 36.8121337890625, -124.1181640625, 87.85031127929688, -17.744110107421875, 55.4727783203125, 86.71644592285156, 106.28982543945312, -23.575347900390625, 12.877029418945312, 146.8482666015625, 221.93310546875, 170.67510986328125, -9.551593780517578, -23.9482421875, 36.324462890625, 238.32577514648438, 83.20072174072266, 77.02642822265625, -35.936805725097656, 22.259389877319336, 175.99505615234375, -16.711257934570312, 178.34896850585938, -127.36589050292969, 51.09212875366211, 224.69439697265625, -154.77197265625, 80.22904968261719, 49.32794189453125, 54.390602111816406, -10.717397689819336, 150.46942138671875, 14.959976196289062, 13.412765502929688, -72.27682495117188, -127.91961669921875, 194.2098388671875, 78.6598892211914, 154.68226623535156, 5.9381256103515625, 160.0699462890625, 62.82466125488281, 110.1929931640625, 157.95013427734375, 0.0081634521484375, 80.39402770996094, 39.70086669921875, 17.136566162109375, 155.07666015625, -161.5850830078125, 131.15863037109375, 207.4742431640625, -53.4976806640625, 156.49896240234375, -138.91290283203125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000273.npy"}
{"epoch": 0.5717277486910994, "step": 274, "batch_size": 128, "mean": 74.11883544921875, "std": 91.95629119873047, "min": -167.52462768554688, "p10": -34.15042419433593, "median": 60.35041809082031, "p90": 201.51976318359374, "max": 281.4407958984375, "pos_frac": 0.8125, "sample": [12.947940826416016, 156.98455810546875, -86.74349975585938, 46.65116882324219, 174.97767639160156, 130.54812622070312, 142.61984252929688, 141.09710693359375, 263.1595458984375, 266.520751953125, -48.112091064453125, -15.063758850097656, 26.9405517578125, 179.30621337890625, 0.06299591064453125, 74.96456909179688, -153.4093017578125, 26.37708282470703, 17.281082153320312, -2.488534927368164, 58.788482666015625, 201.4017333984375, 203.3368682861328, 46.146697998046875, 229.317626953125, 123.53622436523438, 47.92706298828125, 201.795166015625, -4.16046142578125, 71.88284301757812, 61.912353515625, 13.536834716796875, 14.007709503173828, 75.16455078125, 120.57537841796875, 130.96751403808594, -7.92034912109375, 27.632354736328125, 9.9254150390625, 152.86383056640625, 31.943695068359375, 195.45965576171875, 236.47897338867188, 209.92431640625, 108.98133850097656, -60.94700622558594, -40.85694885253906, 16.359130859375, 281.4407958984375, 204.29519653320312, 29.201019287109375, -167.52462768554688, 1.430755615234375, 15.88946533203125, 93.22921752929688, -22.5517578125, -38.844635009765625, 154.0919189453125, 204.7152099609375, 146.96478271484375, 88.94940185546875, 4.74517822265625, -36.258392333984375, 86.04470825195312, 190.00662231445312, 3.675506591796875, 109.6578369140625, 143.63394165039062, 128.32177734375, 6.577409744262695, -33.24700927734375, -14.630104064941406, 88.70979309082031, 20.2935791015625, -66.54835510253906, -4.982177734375, 34.99609375, 37.365966796875, 1.1030693054199219, 158.69244384765625, 170.62078857421875, 113.30343627929688, 93.119140625, 76.97100830078125, -87.962646484375, 30.897003173828125, 40.04376220703125, 176.108154296875, 155.01803588867188, 189.66403198242188, 141.92684936523438, 39.15960693359375, 5.9985198974609375, -138.71368408203125, 98.56216430664062, 4.224092483520508, 258.09991455078125, 45.727264404296875, -1.4621810913085938, 112.36506652832031, -22.96917724609375, 106.22042846679688, 148.65292358398438, -15.275405883789062, 10.064933776855469, 99.22038269042969, 33.37139892578125, 137.99392700195312, 13.57449722290039, 29.917842864990234, 33.41716003417969, 44.14805603027344, -37.08819580078125, 126.34466552734375, 240.41091918945312, 151.10711669921875, 44.535247802734375, 113.063720703125, 51.86553955078125, 150.5719451904297, -90.53327941894531, 114.26962280273438, 124.20256042480469, 141.90737915039062, 50.07130432128906, 157.47610473632812, 111.8968505859375, 215.05438232421875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000274.npy"}
{"epoch": 0.5738219895287958, "step": 275, "batch_size": 128, "mean": 75.52490234375, "std": 93.48363494873047, "min": -134.12448120117188, "p10": -28.574340057373043, "median": 58.94781494140625, "p90": 201.37163543701172, "max": 316.3930969238281, "pos_frac": 0.7734375, "sample": [208.56097412109375, -14.73046875, -14.617938995361328, -27.562744140625, 56.563255310058594, 132.52276611328125, 211.27951049804688, 141.5223388671875, 97.69692993164062, -6.6200408935546875, 3.9178524017333984, 15.667984008789062, 192.4208526611328, -51.65034484863281, 152.87899780273438, -134.12448120117188, 15.050460815429688, 151.82467651367188, 225.1644744873047, 77.02316284179688, -105.51922607421875, -31.020370483398438, 246.6275634765625, -41.682373046875, 135.03115844726562, 175.52365112304688, 58.836944580078125, 166.60287475585938, 163.404052734375, 30.4166259765625, 75.20620727539062, 6.951801300048828, 143.75350952148438, 23.405731201171875, 316.3930969238281, 91.6424560546875, 17.539262771606445, 10.446823120117188, -76.71292114257812, 79.90129852294922, 0.0, 0.0, 144.42449951171875, 145.55563354492188, 3.1957931518554688, -0.453399658203125, 28.02783966064453, 44.63568115234375, -41.51039123535156, 0.0, -12.534828186035156, 168.44046020507812, 26.717269897460938, 117.79278564453125, 84.57952880859375, 25.942306518554688, 93.3423080444336, 3.8711071014404297, 59.058685302734375, 202.52012634277344, 63.1973876953125, 103.03890991210938, 199.71340942382812, 192.32308959960938, 77.74044799804688, 1.54705810546875, 79.60629272460938, 316.1728210449219, 272.65069580078125, 34.58184814453125, 168.32754516601562, 78.58088684082031, -50.5206298828125, 0.11614990234375, 145.40167236328125, 134.21368408203125, 49.42921447753906, 11.26708984375, 252.17031860351562, 0.0, 36.82890319824219, 150.3994140625, -30.934730529785156, 92.18438720703125, 253.0772705078125, 161.35147094726562, 2.1568603515625, 111.49261474609375, -8.4212646484375, 105.00430297851562, -25.6934814453125, 78.76766967773438, 4.076995849609375, 195.28076171875, 35.181182861328125, -35.81451416015625, -19.4688720703125, 200.87942504882812, 0.0, 36.24452209472656, 9.36859130859375, -5.383785247802734, -9.691497802734375, 176.35809326171875, 16.278564453125, 60.564605712890625, 196.72732543945312, 20.12311553955078, 44.432373046875, 4.95013427734375, 92.76327514648438, 275.5153503417969, 209.38897705078125, 195.4295654296875, 83.87515258789062, 12.1624755859375, 7.50823974609375, -73.9600830078125, 140.90606689453125, -42.67247009277344, 20.902435302734375, 82.74151611328125, 237.26864624023438, 22.18304443359375, 143.77963256835938, -38.1414794921875, 150.66162109375, 147.85635375976562], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000275.npy"}
{"epoch": 0.5759162303664922, "step": 276, "batch_size": 128, "mean": 67.15556335449219, "std": 95.9648666381836, "min": -264.5528564453125, "p10": -37.154537963867185, "median": 53.41423034667969, "p90": 199.707421875, "max": 305.64398193359375, "pos_frac": 0.78125, "sample": [305.64398193359375, 38.42963409423828, 140.60000610351562, -10.386024475097656, 240.004638671875, 98.18801879882812, 37.04595947265625, 51.77130126953125, 121.173095703125, 55.057159423828125, -45.53216552734375, 192.85678100585938, 91.22021484375, 51.198333740234375, 13.789466857910156, 164.81124877929688, 3.1730575561523438, 15.209075927734375, 65.5203857421875, 198.75247192382812, -36.437835693359375, 236.39892578125, 22.577377319335938, 255.7022705078125, 73.56295776367188, 201.93563842773438, 103.20346069335938, 160.15188598632812, 44.06108093261719, 122.4573974609375, 123.65020751953125, -106.011474609375, 115.3946533203125, 23.3798828125, 51.1009521484375, 27.488037109375, 68.5150146484375, 0.4775066375732422, 75.11630249023438, 69.69509887695312, 171.8585205078125, -7.3361358642578125, 3.933399200439453, 187.37527465820312, 125.20489501953125, 36.09259033203125, 192.22149658203125, 17.137649536132812, -20.055992126464844, 58.8258056640625, -22.047054290771484, 4.6141357421875, 163.4171142578125, 97.33589172363281, 170.421142578125, 22.7872314453125, 18.326080322265625, 50.131378173828125, -264.5528564453125, 106.66876983642578, 93.1417236328125, 24.727447509765625, 109.680419921875, -89.7379150390625, 81.32550048828125, 15.57996940612793, 173.448974609375, -32.177093505859375, 44.19287109375, 27.915496826171875, 156.50827026367188, 25.099822998046875, 129.77484130859375, -83.55548095703125, -173.07373046875, -41.3870849609375, 9.554428100585938, 85.80487060546875, 141.5860595703125, -38.82684326171875, 73.51303100585938, 223.49884033203125, 137.03445434570312, 203.78814697265625, -48.7919921875, 124.09490966796875, -20.944847106933594, -59.8768310546875, -2.233642578125, 55.17474365234375, 3.761066436767578, 233.20150756835938, 156.4781494140625, 11.61798095703125, 236.92355346679688, -130.26483154296875, -13.933135986328125, -8.722984313964844, -93.6688232421875, 49.535400390625, 28.878616333007812, 245.62319946289062, 100.6417236328125, 220.62030029296875, -27.72222900390625, 109.02072143554688, 63.3917236328125, 1.42266845703125, 0.0, -63.441375732421875, 63.30763244628906, -10.652778625488281, 178.60110473632812, -18.255645751953125, 222.03854370117188, 21.047866821289062, 34.21258544921875, 165.26119995117188, -0.28002166748046875, 120.23428344726562, 162.5760498046875, 28.959014892578125, 38.74322509765625, 62.21746826171875, 14.297119140625, 223.79620361328125, 189.82455444335938, 58.47517395019531], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000276.npy"}
{"epoch": 0.5780104712041885, "step": 277, "batch_size": 128, "mean": 52.71246337890625, "std": 95.11674499511719, "min": -231.84933471679688, "p10": -48.808330535888665, "median": 55.27051544189453, "p90": 158.49400634765624, "max": 318.6260986328125, "pos_frac": 0.6875, "sample": [101.68156433105469, -24.68017578125, 12.097110748291016, 140.7591552734375, -12.06051254272461, -99.09249114990234, -9.409156799316406, 29.509811401367188, 27.847368240356445, -156.3204345703125, 108.94412231445312, 59.15217590332031, 102.26226806640625, 73.40411376953125, 128.8944091796875, -16.680604934692383, -16.670074462890625, 14.74749755859375, 112.6928482055664, -53.83122253417969, 17.7939453125, 146.63290405273438, 69.2061767578125, 243.50250244140625, 29.55731201171875, 75.51667785644531, -39.72334289550781, -37.644073486328125, 83.1333236694336, 8.99468994140625, -66.66261291503906, 17.43419647216797, 318.6260986328125, -85.91534423828125, -39.498809814453125, 9.747955322265625, 128.50804138183594, 24.27056121826172, 140.44210815429688, 76.61151123046875, 45.36009216308594, 51.17047119140625, 94.65129089355469, 77.84197998046875, -2.1669387817382812, 301.4434814453125, 124.8546142578125, 0.0, 262.4737548828125, 277.29852294921875, -16.483089447021484, -66.5487060546875, 139.26895141601562, 60.60685729980469, 82.273193359375, 164.64376831054688, 94.36077880859375, 151.46063232421875, 101.01311492919922, 126.3267822265625, -0.4221343994140625, 81.60971069335938, 64.40104675292969, 39.28533935546875, 97.19808959960938, 4.0268096923828125, 120.5753173828125, 76.10658264160156, -231.84933471679688, -41.10272216796875, -102.10298919677734, -35.764068603515625, 48.73881530761719, 144.323974609375, -11.324211120605469, -34.724273681640625, -36.621826171875, 139.92681884765625, 110.76470947265625, 66.95352172851562, 145.4105682373047, 72.93496704101562, 23.30169677734375, -173.6827392578125, 92.99476623535156, 169.9982147216797, 59.98974609375, 261.33441162109375, -17.227096557617188, 238.16885375976562, 11.935997009277344, 49.061798095703125, 3.737823486328125, -139.02578735351562, -6.806587219238281, -0.06457901000976562, 72.67216491699219, 84.71148681640625, -46.655662536621094, 112.27728271484375, 24.538726806640625, 70.07437133789062, 76.55889892578125, 103.77365112304688, 107.94461059570312, 81.34036254882812, 76.11906433105469, -146.496826171875, 230.9957275390625, 156.86050415039062, -93.36846160888672, -0.569976806640625, 131.48614501953125, -18.737564086914062, -55.78196716308594, 1.9695014953613281, 51.38885498046875, 0.0, 162.30551147460938, 179.9896240234375, -16.624496459960938, -2.1954193115234375, 185.52432250976562, 9.8533935546875, 76.88082885742188, 50.87640380859375, 79.18058776855469, -5.389055252075195], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000277.npy"}
{"epoch": 0.5801047120418849, "step": 278, "batch_size": 128, "mean": 45.49703598022461, "std": 109.50682830810547, "min": -173.58419799804688, "p10": -80.89072189331054, "median": 20.24762725830078, "p90": 187.84619140624994, "max": 470.579833984375, "pos_frac": 0.6328125, "sample": [296.67266845703125, 26.909284591674805, -152.61798095703125, 13.016048431396484, -16.658828735351562, -22.616119384765625, 139.57843017578125, 58.98313903808594, -56.29166030883789, -53.96382141113281, 89.17741394042969, -23.85333251953125, 152.17025756835938, -42.052276611328125, 9.123432159423828, 209.7225341796875, -19.602935791015625, 182.91250610351562, -131.18145751953125, 161.32391357421875, -102.15829467773438, 18.310989379882812, 51.0677490234375, 8.870208740234375, -139.81546020507812, -67.8792724609375, 2.379751205444336, 125.44708251953125, -51.9571533203125, 278.43572998046875, 3.204761505126953, -18.80011749267578, -15.30841064453125, 75.52334594726562, 94.58529663085938, -2.577301025390625, 30.133028030395508, -72.52813720703125, 244.79385375976562, -12.426231384277344, -49.98297882080078, 94.56024169921875, 29.38140869140625, -0.852325439453125, 1.0292778015136719, -53.356178283691406, -59.9091796875, -97.44784545898438, 123.69728088378906, 99.42657470703125, 218.63720703125, 237.32305908203125, -21.622711181640625, 24.852218627929688, 30.82159423828125, -82.2058334350586, 146.6724853515625, 43.071807861328125, -27.860870361328125, 110.24339294433594, 9.65296745300293, -123.045166015625, 5.157440185546875, -29.756179809570312, -22.342166900634766, 220.61392211914062, 79.58218383789062, 34.478797912597656, 160.62860107421875, 93.61334228515625, 130.10086059570312, 13.21832275390625, -24.635589599609375, 260.0072021484375, 267.2392578125, 36.2049446105957, -4.414846420288086, 306.47503662109375, -103.35894775390625, -102.29314422607422, 12.123109817504883, -47.68849182128906, 114.50042724609375, -161.8914031982422, -83.62063598632812, 29.54425811767578, 136.04019165039062, -21.593772888183594, 71.30255126953125, 38.990562438964844, -6.666168212890625, -56.03143310546875, 11.810447692871094, 23.0201416015625, 21.53173828125, 115.69955444335938, 141.30343627929688, 124.91462707519531, 148.69412231445312, 71.73114013671875, 23.192302703857422, 87.5626220703125, -45.04644775390625, 181.35504150390625, 12.964874267578125, -102.178466796875, -173.58419799804688, 470.579833984375, 18.963516235351562, 130.8145751953125, -13.217132568359375, 10.1070556640625, -2.8659839630126953, 114.56829071044922, 111.51400756835938, 115.47488403320312, -4.754241943359375, 101.91983795166016, 141.9639129638672, 14.291435241699219, 130.1980743408203, 252.901611328125, 8.638580322265625, 165.37228393554688, -80.32710266113281, 42.401519775390625, -15.920997619628906, 199.35812377929688], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000278.npy"}
{"epoch": 0.5821989528795811, "step": 279, "batch_size": 128, "mean": 70.2845230102539, "std": 103.688720703125, "min": -169.70269775390625, "p10": -45.99748229980469, "median": 61.605224609375, "p90": 192.83299865722657, "max": 374.0830078125, "pos_frac": 0.7578125, "sample": [78.825927734375, 205.25732421875, 186.6827850341797, 44.57847595214844, 146.27215576171875, 99.72955322265625, 86.55622863769531, -16.061660766601562, 193.776123046875, 142.18423461914062, 71.84698486328125, 129.03118896484375, 35.960418701171875, 12.380508422851562, 185.4053955078125, 192.42880249023438, -39.600860595703125, -75.13591003417969, -46.548095703125, 98.87078857421875, 200.33319091796875, 18.365537643432617, 98.93426513671875, 133.17742919921875, -69.49484252929688, 66.85205078125, 169.58184814453125, 321.65771484375, 189.43984985351562, 188.283447265625, 0.27496337890625, 68.5125732421875, -5.1330108642578125, 57.694366455078125, 36.85400390625, 145.65365600585938, 15.134140014648438, 53.4494743347168, -160.19635009765625, 55.379364013671875, 21.654830932617188, -1.760650634765625, 46.11285400390625, 8.9920654296875, -85.92135620117188, 7.197177886962891, 145.3116455078125, -169.70269775390625, 6.912059783935547, 174.218017578125, -2.204376220703125, 338.1767578125, 127.40142822265625, 201.10577392578125, -33.111671447753906, 106.76882934570312, 185.01556396484375, 102.525634765625, 60.496978759765625, -40.227020263671875, -26.464569091796875, 123.85690307617188, -142.7344970703125, -3.0592041015625, 15.87667465209961, 139.17410278320312, 11.394622802734375, 182.07550048828125, 36.39639663696289, 29.549972534179688, 63.2235107421875, 249.8724365234375, 52.48451232910156, 16.86688232421875, -21.178009033203125, 17.098159790039062, 207.61663818359375, -149.0303955078125, 135.886962890625, 191.57345581054688, 37.8404541015625, 9.369171142578125, -0.49934959411621094, -152.29156494140625, 170.8651123046875, 140.660888671875, 131.22988891601562, 40.716522216796875, 133.56036376953125, 76.50064086914062, 62.30012512207031, -50.66520690917969, 327.05828857421875, 72.44061279296875, 161.40496826171875, -14.436492919921875, 101.79925537109375, -3.394012451171875, -0.1749267578125, 96.93777465820312, 29.540435791015625, 184.3812713623047, 17.063140869140625, 22.495956420898438, -162.27462768554688, -6.53863525390625, 148.95274353027344, 229.59622192382812, -58.941429138183594, 31.4879150390625, 60.91032409667969, 89.313720703125, -0.62451171875, 374.0830078125, 217.1607666015625, -35.0400390625, 18.305145263671875, 127.10072326660156, -45.761505126953125, 101.09844970703125, 68.89584350585938, 110.57574462890625, 5.155582427978516, -54.46038818359375, 145.09573364257812, 83.40408325195312, 62.505340576171875, 245.079833984375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000279.npy"}
{"epoch": 0.5842931937172775, "step": 280, "batch_size": 128, "mean": 60.341529846191406, "std": 94.69654083251953, "min": -165.08743286132812, "p10": -45.25460433959961, "median": 46.5499210357666, "p90": 193.35624389648436, "max": 301.120849609375, "pos_frac": 0.75, "sample": [201.18136596679688, 112.15032196044922, -11.987869262695312, 46.66192626953125, 148.26144409179688, -74.18798828125, 48.809173583984375, 161.5972900390625, 148.80609130859375, 69.57635498046875, 156.3623046875, 3.1484832763671875, 124.86390686035156, -9.757293701171875, 116.36943817138672, 124.15338897705078, -21.706836700439453, 1.6247711181640625, -77.99942016601562, 174.1425323486328, -48.41291809082031, 37.074119567871094, 57.82470703125, 80.30352783203125, 95.58045959472656, 202.159423828125, 21.9368896484375, 49.3544921875, 0.4258384704589844, 144.9530029296875, -96.123291015625, 46.43791580200195, -18.139280319213867, -158.92987060546875, 156.2345428466797, 172.23175048828125, 27.503326416015625, 126.11759948730469, 17.842042922973633, -7.293365478515625, 63.792388916015625, 195.73968505859375, -154.23597717285156, 154.65318298339844, 13.19537353515625, 25.695873260498047, 63.955078125, 215.66192626953125, 29.0562744140625, 108.79324340820312, 277.80731201171875, -45.03319549560547, 112.61424255371094, 38.739959716796875, 275.7037353515625, -31.800765991210938, 234.453125, 16.6136474609375, 15.5272216796875, -81.45921325683594, 103.03387451171875, 188.81024169921875, 16.21807861328125, -142.2829132080078, 149.07113647460938, 62.8814697265625, -68.06494140625, 82.0545883178711, 32.873260498046875, 41.43663024902344, 23.469379425048828, -21.53228759765625, -102.12460327148438, 224.39866638183594, 10.306785583496094, 45.061279296875, 91.85906982421875, 8.864463806152344, -14.740997314453125, 189.4251708984375, 161.855224609375, 172.77609252929688, 195.23651123046875, -124.64764404296875, 93.4200439453125, -12.912986755371094, -0.155914306640625, 19.327285766601562, 42.27435302734375, -15.2059326171875, 137.6378173828125, 11.30377197265625, -3.52972412109375, 73.72114562988281, 111.68788146972656, -45.77122497558594, 6.742698669433594, 160.3177490234375, 1.92169189453125, -36.68334197998047, -27.414840698242188, 36.7781982421875, 43.398193359375, -20.959915161132812, 100.75949096679688, 231.91943359375, 0.0, 79.48287963867188, 204.67010498046875, 64.09426879882812, 54.26850891113281, -8.797126770019531, 145.55445861816406, 34.53558349609375, 20.96868896484375, 112.597900390625, 301.120849609375, 18.910911560058594, 129.0583038330078, 51.21943664550781, 3.745189666748047, -165.08743286132812, 75.3871841430664, 192.79119873046875, 63.80517578125, 51.54531478881836, 194.6746826171875, -14.2679443359375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000280.npy"}
{"epoch": 0.5863874345549738, "step": 281, "batch_size": 128, "mean": 62.781036376953125, "std": 99.73049926757812, "min": -207.44430541992188, "p10": -41.28464965820312, "median": 48.71545219421387, "p90": 192.4565643310547, "max": 500.84033203125, "pos_frac": 0.71875, "sample": [53.75360107421875, 90.64507293701172, -29.78143310546875, 23.163787841796875, 39.745269775390625, 193.20785522460938, 16.549346923828125, 150.8904571533203, 49.66139602661133, 150.37721252441406, 28.536880493164062, -37.60111999511719, 15.9808349609375, 57.0706787109375, 157.899658203125, 58.874237060546875, 49.94200134277344, 62.44769287109375, 119.14714050292969, 2.660797119140625, -5.85107421875, 194.4713134765625, 257.583251953125, -26.4403076171875, 7.000465393066406, 117.65469360351562, 500.84033203125, 345.6435241699219, 159.0684814453125, 23.12115478515625, 141.04611206054688, -100.14739990234375, 120.37982177734375, 56.69677734375, 82.6761474609375, 33.329742431640625, 181.06173706054688, -81.68199157714844, 149.33200073242188, 96.83920288085938, 151.61505126953125, 6.74493408203125, 10.549362182617188, -32.87060546875, -16.579193115234375, 76.39656829833984, -3.58343505859375, 219.91226196289062, -46.689208984375, -15.14703369140625, -23.337677001953125, 117.56201171875, -59.439910888671875, 88.55810546875, -0.7219791412353516, 192.13458251953125, 266.0807189941406, 5.105438232421875, -42.1053466796875, 106.53964233398438, 0.8741455078125, 170.23287963867188, -16.350784301757812, 134.70150756835938, 56.034149169921875, 147.5128173828125, 196.79995727539062, -19.274383544921875, -207.44430541992188, 3.268890380859375, 140.0843505859375, -16.377723693847656, 126.20095825195312, 47.769508361816406, -69.77127075195312, 246.41592407226562, -36.1060791015625, 88.90507507324219, 16.263519287109375, 35.93762969970703, 11.01666259765625, 194.28428649902344, 179.51016235351562, 29.128936767578125, -40.93292236328125, -73.46009826660156, 36.018218994140625, -84.94580078125, 30.735855102539062, -39.418800354003906, 212.63406372070312, 30.0521240234375, 23.905776977539062, -53.241851806640625, -9.561210632324219, 181.889404296875, -67.43963623046875, 216.29214477539062, -61.517860412597656, 128.34500122070312, 0.0, -29.6431884765625, 0.129486083984375, 81.8416748046875, 36.119598388671875, 44.06642150878906, 0.0, 83.26248168945312, 136.18954467773438, 103.42298889160156, -39.29412841796875, 1.9743766784667969, 131.03726196289062, 92.6097412109375, 143.80072021484375, 98.46037292480469, 215.89816284179688, 158.6654052734375, 130.28567504882812, 73.6739501953125, 101.68426513671875, -10.870964050292969, 50.981422424316406, -35.8812255859375, 97.78390502929688, -113.150634765625, 78.0555419921875, 9.379119873046875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000281.npy"}
{"epoch": 0.5884816753926702, "step": 282, "batch_size": 128, "mean": 63.227638244628906, "std": 99.10277557373047, "min": -203.8624267578125, "p10": -45.27248916625975, "median": 54.60906219482422, "p90": 188.53197937011717, "max": 345.09564208984375, "pos_frac": 0.75, "sample": [186.52059936523438, -39.67570495605469, 52.771453857421875, 315.32275390625, 156.994140625, 75.33251953125, 2.4071044921875, 27.266082763671875, 138.33120727539062, -86.28182983398438, 140.971923828125, 129.3054962158203, 97.1468505859375, 100.70367431640625, 33.3065185546875, 9.896240234375, 174.32440185546875, 115.03599548339844, 117.4118881225586, -58.136375427246094, 108.56597900390625, 56.44667053222656, 4.694515228271484, 9.78790283203125, -37.889862060546875, 18.148746490478516, 26.6488037109375, 191.03549194335938, 62.38836669921875, 35.729637145996094, -10.546470642089844, 158.107666015625, -36.63287353515625, 2.49334716796875, -177.38201904296875, 214.41815185546875, 145.29815673828125, 3.216337203979492, 67.67141723632812, 143.3253173828125, -19.339324951171875, 4.1419677734375, -10.27511978149414, 167.03839111328125, -23.7181396484375, 16.237762451171875, -55.6683349609375, 207.29461669921875, 40.80674743652344, 132.66458129882812, -203.8624267578125, -135.97036743164062, 198.03384399414062, 345.09564208984375, 177.23434448242188, -10.017311096191406, 3.29718017578125, 32.52024841308594, 4.0109710693359375, 19.80572509765625, 236.56809997558594, 122.05087280273438, 265.35528564453125, 63.290618896484375, 64.52545166015625, -41.674171447753906, -8.020179748535156, 123.3160400390625, 102.16490173339844, 11.468978881835938, 149.64263916015625, -9.20306396484375, 100.26654052734375, 233.41726684570312, 177.28807067871094, 122.777099609375, 68.4808349609375, 108.27090454101562, -53.66856384277344, -89.81695556640625, 151.49691772460938, 107.73858642578125, 49.155029296875, 233.76275634765625, 128.76434326171875, 49.84162902832031, -113.7274169921875, 45.57354736328125, 148.787353515625, 181.051025390625, 204.77859497070312, 115.13763427734375, 89.97352600097656, -16.972320556640625, 10.840614318847656, -29.237167358398438, 31.108123779296875, -18.5186767578125, 78.97283935546875, 20.32318878173828, 223.6717529296875, -68.14959716796875, -124.41421508789062, 83.34495544433594, -93.922119140625, 10.9212646484375, -28.357421875, 92.40225219726562, -2.2469482421875, 30.10931396484375, -28.728302001953125, 138.21697998046875, 147.99395751953125, 4.895881652832031, 187.45904541015625, -127.7562255859375, 229.44769287109375, 94.97920989990234, 68.1285629272461, 22.078079223632812, 35.582489013671875, 97.88446044921875, -27.48065185546875, 50.1876220703125, 155.870849609375, -19.045822143554688, 77.16738891601562, 83.73504638671875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000282.npy"}
{"epoch": 0.5905759162303665, "step": 283, "batch_size": 128, "mean": 55.27618408203125, "std": 95.08182525634766, "min": -178.5877685546875, "p10": -45.276513671874994, "median": 34.070777893066406, "p90": 167.92459869384763, "max": 384.0787353515625, "pos_frac": 0.6953125, "sample": [-57.31761169433594, 66.86700439453125, 129.3347930908203, -37.970306396484375, 25.094924926757812, 158.88790893554688, 58.66649627685547, 178.74444580078125, -18.3262939453125, 135.69577026367188, -16.32513427734375, -39.4368896484375, 31.609786987304688, 177.16641235351562, 69.60504150390625, 157.71246337890625, 105.44134521484375, -44.6497802734375, 132.38119506835938, 23.259185791015625, 138.38555908203125, 54.31622314453125, -151.75486755371094, 86.71701049804688, -49.03180694580078, 21.480972290039062, 128.31570434570312, -27.58013916015625, -22.492835998535156, 0.5947399139404297, 0.0, 155.180908203125, 9.30903434753418, -78.79879760742188, 101.78501892089844, -28.360374450683594, 44.86039733886719, -2.330331802368164, 98.47232055664062, 36.98448181152344, 370.6048583984375, 253.93048095703125, 70.94378662109375, -63.97113037109375, 102.1252670288086, -8.888450622558594, 248.9508514404297, 2.112152099609375, 160.87753295898438, 47.31968688964844, -19.935882568359375, 8.394607543945312, 3.5887203216552734, 31.054290771484375, 384.0787353515625, 33.76470184326172, 130.251708984375, 6.48040771484375, 31.152908325195312, -27.166229248046875, 138.89866638183594, 319.51611328125, 30.17719268798828, 16.214130401611328, 241.8553466796875, -46.7388916015625, 11.08331298828125, 160.9122314453125, -34.93280029296875, 185.88031005859375, -67.79946899414062, 11.167312622070312, 222.08221435546875, 199.68817138671875, 163.9638214111328, -24.958160400390625, -4.629138946533203, 92.54290771484375, 43.94207763671875, 34.376853942871094, -53.79222869873047, 130.58355712890625, -24.424468994140625, -1.133331298828125, 95.83160400390625, -8.810539245605469, 72.25, -94.00320434570312, -69.5416259765625, 46.197357177734375, 1.9177169799804688, -178.5877685546875, -8.23052978515625, 107.49472045898438, 32.530548095703125, 88.25665283203125, 144.31680297851562, 16.228487014770508, -14.085735321044922, -36.19862365722656, 25.445884704589844, 137.5194091796875, 46.623748779296875, -49.738006591796875, 139.57241821289062, 83.04596710205078, 163.3001708984375, -29.780471801757812, 101.9552001953125, 231.0999755859375, 180.26788330078125, 118.74246215820312, 45.41273498535156, 16.876693725585938, 0.0, 21.70904541015625, 63.757476806640625, -50.883514404296875, 125.44125366210938, 36.85394287109375, 23.313217163085938, -40.8009033203125, 10.979583740234375, 78.38003540039062, -33.37689208984375, 60.34405517578125, 72.06298828125, 39.02264404296875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000283.npy"}
{"epoch": 0.5926701570680628, "step": 284, "batch_size": 128, "mean": 60.98337936401367, "std": 109.90228271484375, "min": -166.68679809570312, "p10": -78.49993133544922, "median": 49.170310974121094, "p90": 214.81083984375, "max": 329.7212219238281, "pos_frac": 0.6875, "sample": [-81.36936950683594, 57.56048583984375, -85.49534606933594, -166.68679809570312, 122.570556640625, -103.176513671875, 42.24040222167969, 70.90997314453125, 107.74957275390625, 213.69635009765625, 231.056884765625, 21.56744956970215, 130.41183471679688, 226.50198364257812, 47.697723388671875, -40.821533203125, 120.23382568359375, 97.6370849609375, 255.0072021484375, 30.426513671875, 94.03128051757812, -115.34271240234375, -3.2215576171875, 106.01344299316406, 251.59375, 140.67062377929688, 119.73468780517578, 191.422119140625, 60.04765319824219, -23.07257080078125, -154.84910583496094, -51.19476318359375, 29.706756591796875, 35.25372314453125, 157.78594970703125, 241.5889892578125, 148.17376708984375, 136.70330810546875, 115.64013671875, -86.20463562011719, 132.3985595703125, 329.7212219238281, -26.632476806640625, 115.1815185546875, 191.39166259765625, 160.64309692382812, 233.6767578125, 100.42074584960938, 170.16615295410156, 15.88232421875, 169.30014038085938, 21.89611053466797, 217.41131591796875, 5.201019287109375, 132.89697265625, 111.68069458007812, -165.06149291992188, -166.32235717773438, 46.43467330932617, -71.1575927734375, 20.586746215820312, -41.176124572753906, -8.414527893066406, -33.504150390625, -67.1866455078125, 38.16241455078125, 23.928434371948242, 142.26254272460938, -71.9050521850586, 47.12066650390625, 22.080078125, 65.56500244140625, -16.526302337646484, 84.02301788330078, 10.6392822265625, -147.7496337890625, -55.13397216796875, -9.638904571533203, 228.1627197265625, 314.43670654296875, -2.8640060424804688, -7.335357666015625, 15.373710632324219, 263.4930114746094, -26.705963134765625, 87.8743896484375, -77.27017211914062, 35.12641906738281, 10.74261474609375, 12.137031555175781, 108.06318664550781, -1.11346435546875, 182.25360107421875, 101.89619445800781, 138.13961791992188, 210.17459106445312, 199.80593872070312, 8.384689331054688, 201.20555114746094, 123.45611572265625, -119.68276977539062, 65.82373046875, -6.6831512451171875, 283.98046875, 33.54935073852539, -46.117767333984375, -84.43084716796875, 15.901641845703125, 95.41729736328125, 160.22720336914062, 84.7413330078125, -25.818199157714844, -57.91546630859375, 19.40093994140625, 99.83474731445312, 50.64289855957031, -60.219482421875, 108.69416046142578, 218.601318359375, 185.86456298828125, 61.905792236328125, 134.75433349609375, -5.630977630615234, -13.104339599609375, 141.17355346679688, -30.192550659179688, -136.05776977539062, 85.31428527832031], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000284.npy"}
{"epoch": 0.5947643979057592, "step": 285, "batch_size": 128, "mean": 45.13227462768555, "std": 98.98041534423828, "min": -158.06927490234375, "p10": -71.16801452636719, "median": 37.46434783935547, "p90": 185.7324035644531, "max": 308.720458984375, "pos_frac": 0.65625, "sample": [35.3226318359375, 69.2332763671875, 176.83908081054688, 23.04815673828125, 16.771621704101562, -63.99078369140625, 230.97267150878906, 226.0043182373047, 7.5178985595703125, 4.994140625, -87.53668212890625, 134.12518310546875, 84.50651550292969, 112.69187927246094, 217.85594177246094, 6.286396026611328, 72.50970458984375, -114.0841064453125, -21.4384765625, -2.3667354583740234, 19.2476806640625, 183.0281982421875, -22.365447998046875, 27.56085205078125, 51.88548278808594, 128.84576416015625, 25.716522216796875, 55.468780517578125, 118.0999755859375, -51.96531677246094, -24.696014404296875, -127.47927856445312, 188.82199096679688, 140.67239379882812, 121.806640625, -66.20149230957031, 120.1351318359375, -155.6279296875, -54.48625183105469, -10.727020263671875, -48.7830810546875, 55.4254150390625, -55.394447326660156, 133.34542846679688, -22.081878662109375, -27.976951599121094, 295.3293762207031, -1.7818450927734375, -44.64677429199219, 139.0997314453125, -59.121673583984375, 99.78375244140625, 137.6082000732422, -20.183509826660156, 113.12379455566406, -57.837310791015625, -74.60574340820312, 101.54583740234375, 70.15274047851562, 184.40829467773438, 25.884761810302734, 26.848373413085938, 221.53854370117188, 32.23529052734375, 44.2734375, 40.43084716796875, 83.1968994140625, -115.7128677368164, -55.661407470703125, -83.1373291015625, 40.58232116699219, 21.025543212890625, 25.68377685546875, 39.60606384277344, 245.51773071289062, 61.459075927734375, -36.24077606201172, -17.760723114013672, 179.75982666015625, 18.568405151367188, 152.0967254638672, -2.0159759521484375, 95.64502716064453, 46.08241271972656, -158.06927490234375, -96.09197998046875, -23.12762451171875, 1.9646949768066406, 232.65896606445312, 175.12796020507812, 80.15702819824219, 114.00582885742188, 29.12054443359375, 4.791656494140625, 49.320465087890625, 124.16317749023438, 43.18965148925781, -86.0528564453125, -44.73284912109375, 44.265533447265625, -5.505395889282227, 67.69805908203125, 81.959716796875, 48.86798095703125, 63.73881530761719, 308.720458984375, 241.3804931640625, -86.77658081054688, -57.08250427246094, 228.14044189453125, 124.28543090820312, 78.07597351074219, 40.10321044921875, 0.6649093627929688, 45.9117431640625, 32.9298095703125, 192.82305908203125, 54.6380615234375, -141.96099853515625, -89.87724304199219, -3.8632125854492188, -69.6947021484375, -53.361358642578125, -7.82611083984375, -61.97552490234375, 149.61212158203125, 91.4212646484375, 208.87405395507812], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000285.npy"}
{"epoch": 0.5968586387434555, "step": 286, "batch_size": 128, "mean": 61.50841522216797, "std": 101.46376037597656, "min": -154.7259521484375, "p10": -49.92097244262694, "median": 49.89263916015625, "p90": 188.02323760986326, "max": 443.98626708984375, "pos_frac": 0.71875, "sample": [-12.6478271484375, 62.03643798828125, 58.95611572265625, 46.216888427734375, 210.97467041015625, 194.03109741210938, -24.431915283203125, 104.5330581665039, 56.17023468017578, 156.51431274414062, 147.34054565429688, 166.77667236328125, 45.756011962890625, -15.6544189453125, 104.48214721679688, -9.552780151367188, 12.092308044433594, 87.05421447753906, -21.26044464111328, -21.775146484375, 43.414573669433594, -1.5576286315917969, 91.64775085449219, 83.55976867675781, 100.76263427734375, 1.5449943542480469, 9.54343032836914, 232.78558349609375, 3.1572799682617188, -19.016271591186523, -32.182403564453125, 368.312255859375, 36.677635192871094, 81.5614242553711, 24.889190673828125, 55.10285186767578, 86.13150024414062, -9.35614013671875, 38.95154571533203, 138.28733825683594, -62.4840087890625, 94.20974731445312, 78.33975219726562, 44.194061279296875, 123.017333984375, 115.17654418945312, 61.13970947265625, 147.9014892578125, -62.29182434082031, 73.65644073486328, -108.37701416015625, 116.680419921875, 443.98626708984375, 44.70048522949219, 131.14501953125, 45.62009811401367, 228.62962341308594, 78.57571411132812, 42.18113708496094, -39.12989807128906, -103.99386596679688, -16.634429931640625, 53.09295654296875, 25.309898376464844, 24.4464111328125, 73.77658081054688, 138.3131561279297, 58.366455078125, -33.602264404296875, 49.346221923828125, 155.59552001953125, -128.5520782470703, 24.233802795410156, 50.439056396484375, 36.0814208984375, -20.48434066772461, -57.11424255371094, 229.59255981445312, -31.826400756835938, 208.21817016601562, 11.734130859375, 107.9857177734375, 56.0935173034668, 18.414825439453125, 13.7066650390625, -72.9642333984375, 162.26040649414062, 89.24577331542969, 158.20989990234375, -117.7855224609375, 79.0759048461914, 139.6123046875, 38.91267395019531, 199.57220458984375, -41.05809020996094, 6.8851165771484375, 38.60417938232422, 160.15545654296875, -83.85472106933594, 75.7796630859375, 155.3067626953125, -65.9736328125, 90.54617309570312, 413.84197998046875, 74.39566802978516, -46.83814239501953, -154.7259521484375, 35.8477783203125, -29.260711669921875, 0.0, -12.794189453125, -93.34135437011719, 223.71795654296875, 54.864479064941406, -14.060331344604492, 23.80682373046875, -12.557861328125, 39.00687026977539, -118.60763549804688, 257.84625244140625, 156.8023681640625, 185.4484405517578, 164.70169067382812, 155.12939453125, 222.43408203125, -21.428298950195312, 68.16680908203125, 66.91039276123047], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000286.npy"}
{"epoch": 0.5989528795811518, "step": 287, "batch_size": 128, "mean": 58.588050842285156, "std": 103.35578918457031, "min": -200.82379150390625, "p10": -63.44422607421875, "median": 44.943214416503906, "p90": 180.54811706542966, "max": 292.57733154296875, "pos_frac": 0.7109375, "sample": [118.82186889648438, 7.496013641357422, -73.84432983398438, -8.843387603759766, 48.364036560058594, 175.66238403320312, -48.48089599609375, -43.83616638183594, -89.36270141601562, 4.206268310546875, 65.34721374511719, 44.1229248046875, 189.0374755859375, 173.83126831054688, 1.814666748046875, 221.98931884765625, 43.07537078857422, 53.67913818359375, 168.32162475585938, 235.85879516601562, 20.000995635986328, 42.597042083740234, -34.214630126953125, 97.62646484375, 172.89559936523438, -27.863433837890625, -17.54686737060547, 27.318862915039062, 175.0648193359375, 215.93817138671875, 217.00340270996094, -20.8822021484375, -48.944671630859375, 11.510025024414062, 186.84884643554688, 167.23846435546875, 15.014480590820312, 163.18600463867188, 11.027610778808594, 165.94540405273438, -27.7833251953125, 228.5108642578125, 288.74237060546875, -44.349517822265625, 154.29159545898438, 128.64187622070312, 82.0980224609375, 62.4150390625, 137.80755615234375, 141.92764282226562, -138.6083984375, -11.453559875488281, -37.7918701171875, 59.660186767578125, 34.09169006347656, 72.4501953125, -94.34477233886719, 15.824438095092773, -76.23908996582031, 146.79751586914062, 32.1661376953125, 117.79388427734375, 102.34426879882812, -2.5244903564453125, 183.16497802734375, 137.18276977539062, 100.34707641601562, 13.508575439453125, 135.27294921875, 95.05587768554688, -24.453338623046875, 139.17901611328125, 179.42660522460938, -187.70603942871094, -18.298980712890625, 124.5045166015625, 45.76350402832031, 38.006988525390625, 157.37094116210938, -65.2418212890625, 105.16390991210938, 160.182373046875, 61.8048095703125, 56.988433837890625, -200.82379150390625, -33.13029479980469, 161.4493408203125, -155.44207763671875, -111.40634155273438, 102.22496795654297, 258.5646057128906, 150.03091430664062, 2.1426639556884766, 26.191654205322266, -143.973876953125, -112.03448486328125, 20.440582275390625, 112.69676971435547, 105.43701171875, 19.4141845703125, 243.20809936523438, -8.951179504394531, 121.87181854248047, 99.70404052734375, 53.33892822265625, 93.24176025390625, 22.542404174804688, 292.57733154296875, 168.88458251953125, -6.4576263427734375, 136.0400390625, 10.120916366577148, 38.53973388671875, 150.04180908203125, -14.404052734375, 21.581501007080078, 230.74932861328125, 23.38202667236328, 175.99209594726562, -13.759391784667969, -186.96636962890625, -62.673828125, -0.989898681640625, -2.704925537109375, 6.841644287109375, 96.01477813720703, -44.56111145019531, 17.566970825195312], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000287.npy"}
{"epoch": 0.6010471204188481, "step": 288, "batch_size": 128, "mean": 53.953365325927734, "std": 95.138916015625, "min": -192.68362426757812, "p10": -50.91184463500975, "median": 43.23520278930664, "p90": 192.6937713623047, "max": 319.3140869140625, "pos_frac": 0.71875, "sample": [99.4061279296875, -42.904117584228516, -42.802642822265625, 5.0987548828125, 43.65308380126953, 47.718505859375, 22.078720092773438, 0.0, 7.2957916259765625, 104.87442016601562, 42.81732177734375, 0.0, -150.56985473632812, 1.6764717102050781, 192.64273071289062, -46.041831970214844, 163.39666748046875, 15.422027587890625, 34.231971740722656, -192.68362426757812, 31.57628631591797, -86.08808898925781, 287.93658447265625, 19.65496826171875, -31.827835083007812, 227.82791137695312, 4.922569274902344, -30.657684326171875, 95.57919311523438, 40.46942138671875, 91.8826904296875, 46.4222412109375, 105.58343505859375, 19.061126708984375, 75.90461730957031, -153.01385498046875, -129.96600341796875, 3.406402587890625, 94.41339111328125, 59.254302978515625, -2.312255859375, -8.757080078125, -62.27520751953125, 36.26301574707031, 119.31854248046875, 55.3863525390625, 59.32121276855469, -104.60916137695312, 232.45025634765625, 32.873085021972656, 233.53173828125, 8.42181396484375, 30.841781616210938, 0.51593017578125, -8.673095703125, 257.84014892578125, -27.665435791015625, -20.10955047607422, 58.52593231201172, 296.2388916015625, 67.70693969726562, 42.26593017578125, 107.20806884765625, 180.28616333007812, 62.38667297363281, 126.39642333984375, 143.3907470703125, 209.8192138671875, -43.22535705566406, 184.08541870117188, -75.88992309570312, -83.74264526367188, 41.436607360839844, -9.48284912109375, 16.66680908203125, -84.940185546875, -2.1612281799316406, 143.81967163085938, 72.03965759277344, -42.929443359375, -10.93292236328125, 73.35939025878906, 52.820068359375, -3.682464599609375, 30.91905975341797, 53.197998046875, 59.228546142578125, -23.374908447265625, 184.72459411621094, 53.40730285644531, 112.41952514648438, -26.040695190429688, 92.75586700439453, 192.8128662109375, 196.6090850830078, 147.25906372070312, 9.43475341796875, 121.563720703125, 70.211181640625, -71.89616394042969, 82.27569580078125, 176.75865173339844, 196.86956787109375, 108.84544372558594, -79.243408203125, 94.0777587890625, 228.74838256835938, 117.96060180664062, 319.3140869140625, 51.411895751953125, 50.890960693359375, -24.17315673828125, 44.36871337890625, 28.251480102539062, -72.12869262695312, 36.00773620605469, 50.223480224609375, 5.95953369140625, -27.183563232421875, 120.3143539428711, 141.7503662109375, -1.855499267578125, 127.12814331054688, 40.96909713745117, 208.17076110839844, 79.58135986328125, 134.14923095703125, 31.876022338867188], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000288.npy"}
{"epoch": 0.6031413612565445, "step": 289, "batch_size": 128, "mean": 36.26837158203125, "std": 94.06484985351562, "min": -187.77029418945312, "p10": -79.86991271972656, "median": 33.919639587402344, "p90": 169.70992889404297, "max": 259.0, "pos_frac": 0.65625, "sample": [-107.9539794921875, -6.92901611328125, 14.54144287109375, 32.584022521972656, -74.23641967773438, 42.27586364746094, 7.0861358642578125, 0.0, -164.45437622070312, 47.60882568359375, -67.947509765625, -89.92375183105469, 71.92640686035156, 56.465240478515625, -97.20913696289062, 151.38397216796875, 151.51873779296875, 17.757904052734375, -16.863555908203125, -74.0172348022461, 118.9482421875, -0.023590087890625, 133.14389038085938, 78.28791809082031, 59.462432861328125, -43.91375732421875, -86.2474365234375, 197.02630615234375, 169.66770935058594, -4.24774169921875, 195.8402099609375, -87.3448486328125, -43.2796630859375, 11.342376708984375, 30.17236328125, 12.008575439453125, 27.275962829589844, 68.60458374023438, 76.83111572265625, 56.33746337890625, 145.48004150390625, 168.5997314453125, 119.8349609375, 35.25525665283203, -187.77029418945312, -72.3037109375, 53.82463073730469, 11.88656997680664, -68.77532958984375, 6.934623718261719, 238.25120544433594, -77.13668823242188, 162.47210693359375, 181.9981689453125, 57.02197265625, 127.04718780517578, -50.78361511230469, 36.15538024902344, -28.808853149414062, 185.4481964111328, -52.36419677734375, 81.46513366699219, 9.418846130371094, 97.79828643798828, 79.20477294921875, -62.2354736328125, 203.5699462890625, -54.10565185546875, 52.38019561767578, -49.886016845703125, -105.03065490722656, -119.41375732421875, 30.24053955078125, -59.65826416015625, -54.41656494140625, 17.305335998535156, -110.76062774658203, 16.333736419677734, -48.979454040527344, 169.80844116210938, 76.91273498535156, 148.38816833496094, 44.673683166503906, 56.377716064453125, -89.42349243164062, 91.55653381347656, 47.495635986328125, -86.40043640136719, -38.892242431640625, 146.29946899414062, -53.43193054199219, 199.7689971923828, 39.36151123046875, 54.73797607421875, 161.52899169921875, -34.70606994628906, 7.107568740844727, 44.4796142578125, 57.457733154296875, 35.4908447265625, 118.36309814453125, -67.60379028320312, 175.0238037109375, 30.58563232421875, 1.402740478515625, 15.455657958984375, -62.313079833984375, 120.43798828125, -61.87969970703125, 173.8950653076172, 18.19866180419922, 67.60342407226562, -4.627555847167969, 259.0, 49.693115234375, 82.26541137695312, -3.322509765625, 250.0972900390625, -159.59600830078125, 146.41888427734375, 192.8175506591797, -1.8614501953125, 87.16641235351562, 69.55233764648438, 38.80329895019531, 11.663017272949219, 101.5433349609375, 135.974365234375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000289.npy"}
{"epoch": 0.6052356020942409, "step": 290, "batch_size": 128, "mean": 55.15168762207031, "std": 102.219970703125, "min": -179.82427978515625, "p10": -61.67144699096679, "median": 45.84444618225098, "p90": 184.77572631835935, "max": 383.311767578125, "pos_frac": 0.6875, "sample": [256.47607421875, 7.69610595703125, 29.516357421875, 235.2748260498047, 72.40634155273438, 85.7884521484375, 121.45220947265625, 318.9849853515625, 141.07949829101562, 54.045166015625, 231.85641479492188, 209.37942504882812, -13.519998550415039, 64.23568725585938, -101.93597412109375, 46.23249816894531, 130.70806884765625, 97.18316650390625, -15.642425537109375, 82.37939453125, -47.6954345703125, -13.701751708984375, 20.321807861328125, 168.1248016357422, -5.80096435546875, -14.432647705078125, -73.26765441894531, 177.06935119628906, 181.30386352539062, 87.1527099609375, -12.567474365234375, -6.04681396484375, 78.50335693359375, -47.239959716796875, 10.755447387695312, 129.995849609375, 113.40753173828125, 20.65560531616211, -17.33984375, -123.81625366210938, 131.00936889648438, 2.3036270141601562, 34.808685302734375, 60.574737548828125, 99.39889526367188, 11.446846008300781, 82.4654541015625, -42.28411865234375, 69.58897399902344, 240.75927734375, -77.34716796875, 36.881195068359375, 28.760723114013672, -99.5070571899414, 79.81430053710938, 158.88275146484375, 45.45639419555664, 140.75048828125, 131.72933959960938, -8.258026123046875, -8.354270935058594, 113.14501953125, -179.19387817382812, 150.3203582763672, -6.30828857421875, 59.166168212890625, 311.4007568359375, 164.7379150390625, 134.56338500976562, 130.4747772216797, 5.505401611328125, 122.73895263671875, 217.0274658203125, -59.83013916015625, -99.81858825683594, -179.82427978515625, -62.851356506347656, 79.45965576171875, 32.959136962890625, 36.46826171875, -23.940704345703125, 48.15753173828125, -7.511749267578125, 173.88803100585938, 241.7877197265625, 64.08493041992188, 211.14617919921875, 129.04693603515625, -174.64537048339844, -38.47563934326172, 125.34136199951172, -92.78239440917969, -64.29660034179688, 41.205047607421875, 52.697479248046875, -33.84465789794922, 94.17822265625, -39.69041442871094, 192.87673950195312, 71.6807861328125, 62.089019775390625, 6.671455383300781, 1.64703369140625, -61.165771484375, -5.1777191162109375, 8.882080078125, 383.311767578125, 25.039939880371094, 65.73934936523438, 24.54650115966797, -12.067447662353516, 162.77197265625, -42.716575622558594, -114.69580078125, 16.6717529296875, 169.66741943359375, 199.86273193359375, 39.300575256347656, -47.87261962890625, 127.43130493164062, 55.840087890625, 58.774627685546875, -35.1131591796875, 10.604705810546875, -1.1513214111328125, 17.48846435546875, 136.26617431640625, 67.8670654296875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000290.npy"}
{"epoch": 0.6073298429319371, "step": 291, "batch_size": 128, "mean": 69.67835998535156, "std": 97.65288543701172, "min": -237.8033447265625, "p10": -43.589169311523435, "median": 52.816551208496094, "p90": 196.73654479980468, "max": 404.4756774902344, "pos_frac": 0.765625, "sample": [239.82305908203125, -11.302623748779297, 203.26162719726562, 24.439056396484375, -46.19671630859375, 24.00218963623047, -237.8033447265625, 130.0611572265625, 285.5041198730469, -10.516510009765625, 132.11013793945312, 172.38134765625, 47.62371826171875, -27.246658325195312, 121.526611328125, 108.68070983886719, 174.42405700683594, -58.77288818359375, 43.84759521484375, 53.66404724121094, 104.77593994140625, 116.760498046875, 171.93048095703125, 18.287078857421875, 115.36827087402344, 4.2607574462890625, 54.16485595703125, -36.63531494140625, 49.623390197753906, 55.6793098449707, 183.7555694580078, 12.8773193359375, 175.77639770507812, 61.706298828125, 158.13800048828125, 226.71910095214844, 96.75201416015625, 47.07696533203125, 42.6317138671875, 55.48106384277344, 63.13470458984375, 88.53256225585938, 69.29119873046875, 6.0283203125, 133.7760772705078, -32.270294189453125, 164.75518798828125, 241.61512756347656, -64.0914306640625, 68.15016174316406, -60.711944580078125, 193.0224609375, 70.28839111328125, 404.4756774902344, 73.1688232421875, 231.788818359375, 91.02999877929688, 213.15966796875, 104.30181121826172, -42.471649169921875, 22.5106201171875, 0.9854755401611328, 203.6998291015625, 108.41109466552734, 86.11198425292969, 25.7952880859375, 42.976593017578125, 28.584030151367188, 184.33587646484375, -22.898910522460938, -8.048049926757812, -40.66569519042969, -82.42253112792969, 73.16610717773438, 95.955078125, 11.960746765136719, -9.15411376953125, 50.382568359375, -40.853118896484375, 116.3798828125, 11.973054885864258, 199.65264892578125, 63.44367980957031, 147.28045654296875, 5.20843505859375, 138.568603515625, 20.753997802734375, 63.8900146484375, 0.0, -9.885490417480469, 46.5028076171875, 158.89320373535156, 42.6802978515625, 24.050140380859375, -12.9488525390625, -7.548852920532227, 19.80462646484375, -54.86883544921875, 181.21804809570312, 21.892379760742188, 204.14596557617188, 27.23138427734375, 9.997154235839844, 178.94180297851562, 35.896270751953125, -61.57159423828125, -80.51179504394531, 8.911262512207031, 175.48602294921875, 51.96905517578125, -1.81756591796875, -55.11163330078125, 32.64141082763672, 169.10507202148438, 164.97352600097656, -26.48358154296875, 330.9952392578125, -48.113525390625, -97.910888671875, 160.647705078125, 80.93496704101562, -88.18650817871094, 216.16098022460938, 36.82592010498047, 170.67640686035156, 106.89227294921875, 11.23046875, 195.48678588867188], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000291.npy"}
{"epoch": 0.6094240837696335, "step": 292, "batch_size": 128, "mean": 51.32325744628906, "std": 96.60364532470703, "min": -246.86834716796875, "p10": -66.99477081298828, "median": 48.134727478027344, "p90": 158.54608001708985, "max": 327.4873046875, "pos_frac": 0.75, "sample": [21.463348388671875, 67.781982421875, 32.54490661621094, -46.77978515625, 17.06284523010254, 96.84687805175781, 106.52871704101562, 34.633995056152344, -29.859601974487305, 18.82307243347168, 15.4608154296875, -1.78912353515625, 25.617942810058594, 119.70053100585938, -82.25311279296875, 230.802734375, 26.267364501953125, 17.554954528808594, 139.14938354492188, 93.93716430664062, -3.7960777282714844, -21.972702026367188, 22.752729415893555, 64.8214111328125, -92.3282470703125, -142.8216552734375, 96.22116088867188, 108.06704711914062, 96.39642333984375, -10.355560302734375, 175.372802734375, 138.73297119140625, 24.193239212036133, 57.728965759277344, 127.29946899414062, 240.17156982421875, 45.70123291015625, 18.36236572265625, -206.97415161132812, 53.065345764160156, 84.39028930664062, -4.556663513183594, 53.90838623046875, 84.56594848632812, -10.3348388671875, -7.83172607421875, -33.12847900390625, -69.35008239746094, 309.5670166015625, 142.39849853515625, 22.060516357421875, 85.4119873046875, 61.9248046875, 157.87205505371094, 28.54425048828125, 20.95939826965332, 160.11880493164062, 69.44566345214844, 51.13128662109375, 41.042755126953125, 196.65634155273438, -126.89306640625, 80.61233520507812, 11.739364624023438, 50.56822204589844, 110.71987915039062, 315.87677001953125, 88.90255737304688, 100.36273193359375, -70.47999572753906, 138.41958618164062, 62.14232635498047, 151.0059814453125, 100.5916748046875, 151.9659423828125, 148.90469360351562, 16.37615966796875, 21.830047607421875, 12.90234375, 22.873794555664062, -44.84063720703125, 38.32916259765625, 2.352375030517578, 327.4873046875, 126.49436950683594, 91.91424560546875, 171.33303833007812, -19.898361206054688, 33.579833984375, 145.97665405273438, -45.7376708984375, -5.8891143798828125, 221.63922119140625, -246.86834716796875, -115.35449981689453, 97.32485961914062, -45.8240966796875, -42.87388610839844, 88.97348022460938, 62.7972412109375, -85.63262939453125, 110.23902893066406, 5.753501892089844, -150.58200073242188, 33.578102111816406, 2.3548316955566406, 181.32159423828125, 76.6258316040039, 143.11569213867188, 62.1634521484375, 11.8521728515625, -65.9853515625, 21.059288024902344, 110.715087890625, 61.32312774658203, -78.20311737060547, 83.72334289550781, 34.26936340332031, 17.901168823242188, 267.4296875, 61.8745231628418, 52.6109619140625, -111.99894714355469, -58.204002380371094, 192.9563751220703, 154.1136474609375, 79.11886596679688, -12.382280349731445], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000292.npy"}
{"epoch": 0.6115183246073298, "step": 293, "batch_size": 128, "mean": 67.09434509277344, "std": 88.9114761352539, "min": -126.3131103515625, "p10": -38.23184509277344, "median": 55.56471252441406, "p90": 198.48749694824218, "max": 295.4132385253906, "pos_frac": 0.7421875, "sample": [174.1455535888672, 28.51752471923828, 232.3575439453125, 95.399658203125, -24.447647094726562, -31.5574951171875, 216.30511474609375, -46.046661376953125, -6.334136962890625, -1.812255859375, 9.635879516601562, 49.905303955078125, 295.4132385253906, 94.68378448486328, 40.234771728515625, 113.82464599609375, -89.19889831542969, -30.904022216796875, 10.172428131103516, -52.54478454589844, 97.60894012451172, -21.219688415527344, 39.4730224609375, -4.490835189819336, 33.24945068359375, 83.88949584960938, 97.54568481445312, 64.6681137084961, 2.184906005859375, 151.40118408203125, 262.997314453125, 137.2210693359375, 54.782867431640625, -105.23709106445312, 40.741668701171875, 71.72857666015625, 31.37664794921875, 124.44905090332031, 64.78985595703125, 30.94178009033203, 69.97736358642578, -65.19247436523438, 277.39007568359375, 115.4324951171875, 104.49665832519531, 92.48822021484375, -20.753997802734375, 235.985595703125, 82.22799682617188, -4.256683349609375, -72.67320251464844, 31.074264526367188, 195.14430236816406, 222.89141845703125, 224.38987731933594, 198.08895874023438, 129.20379638671875, 58.79229736328125, 31.362640380859375, 156.15850830078125, 27.186767578125, 59.772216796875, 110.95307159423828, -78.01261138916016, 42.673309326171875, 67.94093322753906, 221.07931518554688, 225.19039916992188, 61.382057189941406, -3.645343780517578, 87.2379150390625, 63.723968505859375, -24.979446411132812, 0.0, 220.00665283203125, 70.94758605957031, 33.7154541015625, -21.3326416015625, 3.7337570190429688, 56.3465576171875, 184.44898986816406, 160.9107666015625, -36.27012634277344, 47.46661376953125, 157.0740966796875, 199.41741943359375, 15.846420288085938, 152.1700439453125, 186.30389404296875, -63.009193420410156, -6.6402587890625, 182.3056640625, 54.690338134765625, -38.12150573730469, 16.07842254638672, 68.04249572753906, 111.51643371582031, -29.04052734375, 29.837692260742188, 54.07127380371094, 17.76654052734375, -126.3131103515625, 166.829833984375, 94.61156463623047, 123.9393310546875, 51.63148498535156, 86.78396606445312, 30.16241455078125, 2.4309921264648438, 236.7851104736328, 129.40231323242188, -53.20793533325195, 58.572265625, 166.70751953125, -21.2982177734375, 48.697998046875, -38.48930358886719, -0.7935066223144531, -42.075164794921875, -53.50543212890625, 57.5919189453125, 143.98919677734375, 119.22769165039062, 142.43418884277344, -1.0634651184082031, 33.262969970703125, 105.40505981445312, 39.49212646484375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000293.npy"}
{"epoch": 0.6136125654450262, "step": 294, "batch_size": 128, "mean": 62.16522979736328, "std": 101.09305572509766, "min": -178.14984130859375, "p10": -48.00068130493164, "median": 53.211721420288086, "p90": 192.5655731201172, "max": 516.6282958984375, "pos_frac": 0.703125, "sample": [127.79205322265625, 78.80072021484375, 108.95901489257812, -19.4742431640625, 16.66168212890625, -71.275146484375, -11.091156005859375, 106.946044921875, 138.95005798339844, 74.07345581054688, 199.42730712890625, 53.356109619140625, -49.43543243408203, 84.37338256835938, 208.36215209960938, 11.855186462402344, -174.32952880859375, 53.06733322143555, 155.61459350585938, 97.73777770996094, 18.46514892578125, 14.156829833984375, 113.02166748046875, 18.790634155273438, -5.319660186767578, 72.57940673828125, -4.357355117797852, -47.38578796386719, 75.54781341552734, 165.31298828125, 123.63278198242188, 42.369537353515625, -3.15277099609375, -5.697959899902344, -0.2892646789550781, -9.427978515625, -111.80715942382812, -128.3297119140625, 178.55715942382812, -22.586105346679688, 68.79849243164062, 104.1586685180664, 22.438217163085938, -12.252899169921875, 245.0808868408203, 516.6282958984375, 54.190818786621094, 330.5992431640625, 16.749191284179688, -9.55670166015625, 15.10986328125, 291.8043518066406, 124.94404602050781, 107.9261245727539, 191.86309814453125, 113.02714538574219, -33.3848876953125, 183.29669189453125, -17.573211669921875, 64.96501922607422, -1.7440032958984375, 5.299598693847656, 126.14204406738281, -31.49969482421875, 61.888572692871094, 203.67697143554688, 72.947265625, 81.3367919921875, 33.070220947265625, 182.01663208007812, -14.327606201171875, 45.19477081298828, 11.098922729492188, 167.28173828125, -11.231369018554688, -50.33055114746094, 169.32574462890625, -49.58514404296875, 31.16314697265625, 5.478031158447266, 78.841796875, 0.0, 210.9845428466797, 220.5184326171875, 79.38995361328125, 150.31663513183594, -178.14984130859375, -50.9193115234375, 89.29632568359375, -56.36181640625, 42.90562438964844, 66.900146484375, 57.967498779296875, 4.1253662109375, 63.43428039550781, -9.942007064819336, 68.84466552734375, -11.968856811523438, 46.60205078125, 9.626338958740234, 215.7069091796875, 194.20468139648438, 102.69160461425781, 30.707168579101562, -28.0091552734375, 23.853904724121094, 273.1026611328125, -34.982666015625, 21.74285888671875, 64.34646606445312, 26.95196533203125, 198.08444213867188, -14.470916748046875, 56.193389892578125, 99.17819213867188, 87.2354736328125, 142.1864013671875, -2.2008438110351562, 3.6358413696289062, 184.35348510742188, 13.029205322265625, -68.74319458007812, -59.851295471191406, 77.13829040527344, 176.24200439453125, -119.02117919921875, 132.7706298828125, 188.19493103027344], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000294.npy"}
{"epoch": 0.6157068062827226, "step": 295, "batch_size": 128, "mean": 53.47068786621094, "std": 89.13606262207031, "min": -190.16830444335938, "p10": -55.7804458618164, "median": 51.60154724121094, "p90": 169.38260803222653, "max": 290.2579345703125, "pos_frac": 0.7265625, "sample": [113.92584228515625, 98.7183837890625, -94.14190673828125, -19.967666625976562, 193.3017578125, 103.62257385253906, 117.13227844238281, 56.859588623046875, -152.59420776367188, 57.4984130859375, -63.117828369140625, 80.4894790649414, 176.31277465820312, 104.11288452148438, -80.01408386230469, -75.22183990478516, 190.49032592773438, -33.69843292236328, 103.07490539550781, 71.68115234375, -19.159027099609375, 201.02532958984375, 77.36827087402344, 156.4598846435547, 37.758087158203125, 93.64280700683594, -35.336631774902344, 24.967327117919922, 162.58770751953125, 99.0206069946289, 61.72611999511719, 250.74215698242188, -7.9693450927734375, 55.061248779296875, -90.64092254638672, 138.28024291992188, 47.033416748046875, 51.31671142578125, 90.79913330078125, 20.428863525390625, 43.80375671386719, 116.55233764648438, -71.097900390625, -190.16830444335938, 28.574981689453125, 284.50732421875, 91.92796325683594, 129.91998291015625, 18.527488708496094, 12.311599731445312, 9.249931335449219, 12.21099853515625, 81.12379455566406, 16.158584594726562, 8.08551025390625, -37.83013916015625, 93.03263854980469, 135.220458984375, 0.0, 98.91462707519531, 21.467815399169922, -13.019760131835938, 51.886383056640625, -84.7886962890625, -54.776153564453125, -22.5679931640625, 10.133796691894531, 59.75508117675781, 228.1563720703125, -71.72411346435547, 99.31475830078125, 115.261962890625, -31.493621826171875, 34.245826721191406, 32.2911376953125, -53.237823486328125, -94.92649841308594, 110.9499740600586, 87.44869995117188, 105.33541107177734, 3.523120880126953, 158.30133056640625, -41.71917724609375, 58.50453186035156, 140.99984741210938, -54.330238342285156, 76.37813568115234, 22.977218627929688, 23.921234130859375, 16.979736328125, 290.2579345703125, 208.8199462890625, -63.5633544921875, -12.4637451171875, 166.41253662109375, 108.84335327148438, 185.36798095703125, 28.46258544921875, -27.510467529296875, 60.59174346923828, 5.66937255859375, 65.97970581054688, 13.275062561035156, 212.13897705078125, 25.900848388671875, 73.06556701660156, 24.48681640625, 84.27995300292969, 66.38442993164062, -5.38238525390625, -31.715087890625, 28.52727508544922, 163.87213134765625, 69.01616668701172, 16.325302124023438, 130.979248046875, -21.977096557617188, -42.93031311035156, -58.12379455566406, 265.1321105957031, 144.7784423828125, -28.14886474609375, 105.5667724609375, -18.150375366210938, 123.90109252929688, 182.56863403320312, 29.125946044921875, 94.63372802734375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000295.npy"}
{"epoch": 0.6178010471204188, "step": 296, "batch_size": 128, "mean": 76.83897399902344, "std": 105.0455322265625, "min": -246.49884033203125, "p10": -46.27947311401367, "median": 75.70234298706055, "p90": 201.45376892089843, "max": 369.10272216796875, "pos_frac": 0.796875, "sample": [24.732513427734375, 16.10611915588379, 60.44708251953125, 80.30250549316406, 235.7078857421875, -13.63916015625, -105.61257934570312, 170.0352783203125, 162.9279327392578, 106.46051025390625, 162.87168884277344, -41.33074951171875, 192.8568115234375, 52.953094482421875, 369.10272216796875, 98.09774780273438, 249.17047119140625, 82.58464050292969, 133.94329833984375, 211.56781005859375, 62.358642578125, 47.750579833984375, 95.85659790039062, 137.92706298828125, 99.3157958984375, 253.44287109375, 60.93220520019531, -31.770172119140625, 36.759315490722656, -43.04219055175781, -3.9978179931640625, 107.42367553710938, 172.165283203125, 5.467124938964844, -14.4462890625, 1.5803890228271484, 137.79901123046875, -18.370681762695312, 114.40586853027344, 124.47821807861328, 364.15533447265625, 122.47607421875, 54.87066650390625, 179.19078063964844, 37.512115478515625, 160.79156494140625, 304.14019775390625, 161.74691772460938, -149.43341064453125, 129.70574951171875, 197.40045166015625, -46.854530334472656, 77.98907470703125, 57.6241455078125, 153.79808044433594, 190.520263671875, 168.69082641601562, 70.32411193847656, 15.308761596679688, -246.49884033203125, 143.234375, 18.072647094726562, 23.031463623046875, 200.04351806640625, 45.140380859375, 13.502830505371094, 60.3341064453125, -92.23284912109375, -141.35983276367188, 25.49786376953125, 194.70184326171875, 147.93026733398438, 75.04440307617188, 148.05819702148438, 2.485271453857422, 76.36028289794922, 7.370384216308594, 245.18649291992188, 3.694366455078125, 202.31460571289062, -70.12435913085938, 107.76022338867188, 115.3292236328125, 73.36671447753906, 102.1961441040039, 29.93280029296875, 29.746002197265625, 8.75238037109375, 68.76766967773438, -25.223052978515625, 156.07077026367188, 133.0594024658203, 26.272117614746094, 83.3082275390625, 54.07122802734375, 209.44186401367188, 115.02999877929688, 99.168701171875, 7.21209716796875, 115.63055419921875, 16.086044311523438, -46.03302001953125, 109.6812744140625, -116.95155334472656, 210.66238403320312, -71.8936767578125, 17.922225952148438, -6.144622802734375, -117.7667236328125, 190.8943328857422, 20.665802001953125, -107.32952880859375, 240.12777709960938, 201.0848388671875, -27.88299560546875, 63.9488525390625, 31.65887451171875, 119.98358154296875, 128.70962524414062, 85.35794830322266, 168.36846923828125, -66.41372680664062, 172.1077880859375, 0.0, 115.0914306640625, 223.153076171875, -114.59967041015625, -42.05900573730469], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000296.npy"}
{"epoch": 0.6198952879581152, "step": 297, "batch_size": 128, "mean": 53.64710235595703, "std": 90.99677276611328, "min": -172.73135375976562, "p10": -53.20350952148437, "median": 39.86940002441406, "p90": 180.36316528320313, "max": 330.1391906738281, "pos_frac": 0.7109375, "sample": [273.6077575683594, -4.2372283935546875, 167.2626953125, 126.68902587890625, 130.44224548339844, 1.9384841918945312, -54.348785400390625, -1.84405517578125, -30.953371047973633, 44.158294677734375, 195.6356658935547, 141.63897705078125, -69.25308227539062, 102.62628173828125, -19.6873779296875, -26.50274658203125, 68.4422607421875, 47.567626953125, 20.88104248046875, 74.95034790039062, 49.08506774902344, 179.82003784179688, 28.49188995361328, 146.08758544921875, -84.30661010742188, 72.79010009765625, 126.15316772460938, -8.43597412109375, -49.51853942871094, -36.51898193359375, 4.18499755859375, -73.1198959350586, 30.57818603515625, 116.33844757080078, 230.16036987304688, 147.06668090820312, 156.56951904296875, 66.44912719726562, 183.58267211914062, -122.91650390625, 17.03729248046875, 64.57572937011719, -54.815895080566406, 49.76507568359375, 41.7593994140625, 56.54302978515625, 10.31378173828125, 203.384765625, 214.31280517578125, 24.334945678710938, 141.82351684570312, 112.7060546875, 210.08663940429688, 105.5057373046875, 35.430145263671875, 87.22540283203125, 66.14124298095703, 34.15740966796875, 55.20782470703125, -3.4846038818359375, -37.504638671875, 60.22882080078125, 168.50230407714844, 32.677978515625, 186.89312744140625, -52.712677001953125, 22.167007446289062, -11.918426513671875, 81.98038482666016, 1.9316558837890625, 105.41416931152344, -19.957275390625, 36.1640625, 8.49200439453125, -55.110595703125, -172.73135375976562, 40.90910720825195, 40.160430908203125, -30.553619384765625, -153.8740997314453, 136.816650390625, 91.488525390625, 219.403564453125, -93.33258056640625, -29.298004150390625, 44.2152099609375, 34.406402587890625, 2.3619384765625, 163.62017822265625, 104.46054077148438, 121.25689697265625, 29.1884765625, 30.827407836914062, 35.20393371582031, 172.93777465820312, -8.895263671875, -34.683692932128906, 10.500808715820312, 170.12005615234375, -57.60826873779297, 28.36944580078125, -26.669219970703125, -62.635223388671875, 67.53073120117188, 16.4847412109375, -2.2067184448242188, -19.301986694335938, 129.96072387695312, 55.111595153808594, 67.76962280273438, -125.6778564453125, -0.9606056213378906, 31.173587799072266, 24.345550537109375, 39.578369140625, -24.70867347717285, 50.13823699951172, 181.63046264648438, 196.8267822265625, 247.09291076660156, -29.539268493652344, 140.27725219726562, 11.56640625, 150.52085876464844, -46.8194580078125, 95.94638061523438, 123.17073059082031, 330.1391906738281], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000297.npy"}
{"epoch": 0.6219895287958115, "step": 298, "batch_size": 128, "mean": 64.74627685546875, "std": 107.17752838134766, "min": -237.47885131835938, "p10": -44.181761169433585, "median": 44.607513427734375, "p90": 205.03149108886717, "max": 332.350341796875, "pos_frac": 0.71875, "sample": [154.5375518798828, 167.73370361328125, -24.819061279296875, 5.76373291015625, 134.6916961669922, 42.472679138183594, 59.03253173828125, 37.59832763671875, 101.23529052734375, 154.9200439453125, 203.876708984375, 100.94457244873047, -124.99359130859375, -42.343841552734375, 76.68526458740234, 256.4283447265625, 4.135862350463867, 182.9158935546875, 173.93624877929688, 1.277587890625, 29.11724853515625, 1.414520263671875, -21.16764259338379, 24.37249755859375, -42.64814758300781, -8.725610733032227, 13.386754989624023, 142.4972686767578, 6.655941009521484, 52.09520721435547, 158.68832397460938, -12.5118408203125, -147.35845947265625, -112.97372436523438, -51.868682861328125, -76.67633056640625, 38.740753173828125, 183.91207885742188, 232.45057678222656, -7.087348937988281, 173.27996826171875, 164.65786743164062, 42.3389892578125, 300.12774658203125, 83.96875, 149.46481323242188, -71.92047119140625, -15.389236450195312, -5.3634033203125, 0.0, 18.14028549194336, 1.57965087890625, -39.91143798828125, 109.76437377929688, 73.23324584960938, 127.91445922851562, 166.33604431152344, 44.94921875, 224.85357666015625, 238.4135284423828, -36.004791259765625, 220.14358520507812, -122.508544921875, 287.30352783203125, -27.5809326171875, 210.03176879882812, 72.89846801757812, 7.299713134765625, 92.28952026367188, 0.717437744140625, 145.372314453125, -215.8172607421875, -36.396331787109375, -47.76019287109375, 189.41238403320312, -60.56263732910156, 117.88740539550781, 104.4422607421875, 68.439208984375, 173.78878784179688, 332.350341796875, 17.64520263671875, 51.017822265625, -32.826866149902344, 188.788818359375, 128.88250732421875, 7.712249755859375, 133.41921997070312, 206.28701782226562, 24.617218017578125, 264.7860107421875, 38.814422607421875, -0.8287010192871094, 63.068389892578125, 71.03952026367188, 16.711578369140625, 3.38641357421875, 44.26580810546875, 58.833404541015625, -29.14007568359375, 157.55819702148438, 200.6168212890625, 62.531402587890625, 136.2662353515625, 159.341552734375, 94.58914184570312, 188.20303344726562, -40.76580810546875, 204.493408203125, -237.47885131835938, 44.017364501953125, -24.7711181640625, 133.50047302246094, 26.062744140625, 9.572635650634766, -21.0474853515625, 237.43902587890625, 158.65963745117188, 263.4501647949219, 29.92578125, -38.85205078125, 53.87530517578125, -25.805198669433594, 78.8829345703125, -14.25982666015625, -68.67779541015625, 32.01227569580078, -98.82331848144531], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000298.npy"}
{"epoch": 0.6240837696335079, "step": 299, "batch_size": 128, "mean": 57.012821197509766, "std": 86.52439880371094, "min": -200.94386291503906, "p10": -34.659725952148435, "median": 47.56398010253906, "p90": 163.2066177368164, "max": 318.3089904785156, "pos_frac": 0.7734375, "sample": [87.69303894042969, 305.12176513671875, 31.98998260498047, 79.25196075439453, 160.6575927734375, 56.646812438964844, -138.922607421875, 138.15960693359375, -31.5645751953125, 65.95794677734375, 63.075958251953125, -200.94386291503906, 12.727523803710938, -32.954795837402344, -108.4007568359375, 33.03721618652344, 36.1191291809082, 206.72274780273438, -5.284271240234375, -2.4482364654541016, 55.451210021972656, 38.03218078613281, 164.97235107421875, -45.10480499267578, 66.51364135742188, 108.16567993164062, 82.41608428955078, 47.2310791015625, 141.2769775390625, -88.4256591796875, 23.78851318359375, 98.65701293945312, 25.74072265625, 3.4749755859375, 103.28524780273438, -42.461181640625, 92.56060791015625, 50.582733154296875, -37.00897979736328, 81.726806640625, 221.1927490234375, -5.064453125, 178.92044067382812, -19.871063232421875, 97.9670639038086, 48.131622314453125, 26.12646484375, -26.779144287109375, 105.47479248046875, 116.93069458007812, 61.304534912109375, 20.195842742919922, -95.22662353515625, 16.284698486328125, 13.447494506835938, 3.9931488037109375, -20.6258544921875, 13.527740478515625, 231.3616943359375, 128.936279296875, 43.809173583984375, 47.896881103515625, 88.30130004882812, 211.20376586914062, 89.754150390625, -124.67767333984375, 237.265869140625, 162.4498748779297, 105.84976196289062, 318.3089904785156, 20.68224334716797, -3.632457733154297, 133.33395385742188, -12.86676025390625, -116.24824523925781, 68.436279296875, 142.251220703125, -35.974853515625, 144.60577392578125, 26.370384216308594, 86.01934814453125, 19.070770263671875, 133.44094848632812, 118.04276275634766, 91.2008056640625, 215.97531127929688, 63.179595947265625, 190.51548767089844, 44.74693298339844, -29.503082275390625, -34.096099853515625, 84.85528564453125, -47.75365447998047, 9.42608642578125, 106.37164306640625, 111.65115356445312, 198.41107177734375, 122.30206298828125, -47.22589111328125, 99.27349853515625, -20.709732055664062, 12.46197509765625, 133.53866577148438, -25.383338928222656, 26.51947784423828, 10.5152587890625, 130.34539794921875, -2.204437255859375, 95.26016235351562, 42.091461181640625, 2.738983154296875, 95.54861450195312, 26.40740966796875, 5.3447113037109375, 7.7437744140625, 126.10546875, 84.68838500976562, 6.75067138671875, 150.76199340820312, 10.271099090576172, 177.00462341308594, 4.63580322265625, 35.947017669677734, -3.4192352294921875, 52.3778076171875, 156.70413208007812, 19.6278076171875, 9.1976318359375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000299.npy"}
{"epoch": 0.6261780104712041, "step": 300, "batch_size": 128, "mean": 54.59022521972656, "std": 96.97064208984375, "min": -178.63441467285156, "p10": -69.43489532470703, "median": 60.49217987060547, "p90": 185.25952606201167, "max": 316.58013916015625, "pos_frac": 0.71875, "sample": [164.5968017578125, 78.868408203125, 24.84613800048828, -135.09344482421875, 105.53297424316406, 31.8084716796875, 166.22796630859375, -29.757064819335938, -141.58273315429688, 63.3402099609375, 39.090667724609375, -22.99365234375, 131.55006408691406, 201.47674560546875, 31.656513214111328, -27.688583374023438, 35.101165771484375, -5.7418212890625, -7.25152587890625, 70.19680786132812, 16.416900634765625, 7.942138671875, 147.39315795898438, 89.041259765625, 198.66751098632812, 82.88484954833984, 77.669677734375, 99.2056884765625, -35.62889099121094, -33.177978515625, 90.503662109375, 131.94371032714844, 80.09516143798828, -82.620849609375, 163.846923828125, -10.708343505859375, -139.35128784179688, 69.99212646484375, 2.889810562133789, 202.76394653320312, 274.8584289550781, 82.90643310546875, 58.940032958984375, 17.259445190429688, -138.26101684570312, 181.81320190429688, 23.486557006835938, -0.14936447143554688, 41.25346374511719, 214.26809692382812, 90.2115478515625, 75.5423355102539, 98.8585205078125, 169.868408203125, 61.64924621582031, 12.508819580078125, -3.4665069580078125, 103.86956787109375, -34.43841552734375, 0.0, 101.56967163085938, 77.83992004394531, -32.467926025390625, -72.0374755859375, 78.0900650024414, 24.63232421875, 160.046630859375, 195.1008758544922, 92.4560546875, -28.556884765625, 3.56317138671875, 14.65673828125, 138.33311462402344, 114.080078125, 60.590911865234375, -10.321685791015625, 114.4195556640625, 33.916473388671875, 89.00921630859375, -25.381248474121094, 7.076318740844727, -6.6915435791015625, -14.441444396972656, -78.29974365234375, -108.7403564453125, 195.43328857421875, 130.99868774414062, -116.6656494140625, 80.080078125, 27.539962768554688, 114.69534301757812, 62.0689697265625, 201.9033203125, 11.557632446289062, 35.62809753417969, -34.132568359375, 78.50244140625, -37.1036376953125, 6.120573043823242, 316.58013916015625, 193.3009490966797, 160.60073852539062, -43.946441650390625, 60.39344787597656, 41.6514892578125, -63.643638610839844, 72.0084228515625, 91.40625, 113.18199157714844, 37.504974365234375, -125.42730712890625, 178.09671020507812, 247.89344787597656, 71.84710693359375, 72.455810546875, 145.0162353515625, -94.07147216796875, -178.63441467285156, 104.6361083984375, 4.012273788452148, -122.34432983398438, 144.21109008789062, -68.31950378417969, 234.5665283203125, 124.79022216796875, 300.01641845703125, 10.087005615234375, 9.67681884765625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000300.npy"}
{"epoch": 0.6282722513089005, "step": 301, "batch_size": 128, "mean": 54.806304931640625, "std": 96.51044464111328, "min": -224.81427001953125, "p10": -60.07275085449218, "median": 43.75819396972656, "p90": 183.81747283935545, "max": 302.012451171875, "pos_frac": 0.71875, "sample": [13.61871337890625, 206.14743041992188, 99.70620727539062, 96.7938232421875, 0.0, 116.17767333984375, 83.22250366210938, 129.2729949951172, -93.45768737792969, -56.219207763671875, 206.57675170898438, 198.12432861328125, -68.49789428710938, 15.61285400390625, 130.62588500976562, -29.531982421875, -38.3330078125, -22.14211654663086, 50.91107177734375, -224.81427001953125, -42.291282653808594, 86.25384521484375, 151.48655700683594, 47.0540771484375, 22.10064697265625, -94.35059356689453, 289.5415344238281, 146.46929931640625, 102.77899169921875, 11.779998779296875, -0.445556640625, 144.45558166503906, 19.087059020996094, 222.33401489257812, 5.713855743408203, -95.812255859375, -43.033172607421875, 302.012451171875, -22.50849151611328, 168.22531127929688, 14.027168273925781, 43.503082275390625, 104.320068359375, 32.83441162109375, -11.686798095703125, 132.5665283203125, 172.9151611328125, 45.10490417480469, -121.44110107421875, 23.929176330566406, 15.091712951660156, 153.08409118652344, 179.3748779296875, 153.47979736328125, 215.88980102539062, 32.57740783691406, 129.13720703125, 26.029052734375, -104.41616821289062, -56.46197509765625, -102.1966552734375, 19.060880661010742, 10.951187133789062, 96.08050537109375, 35.50181579589844, 20.790592193603516, 4.172809600830078, -50.1444091796875, -7.323602676391602, 169.09661865234375, 208.9019317626953, 76.82186126708984, 72.18679809570312, 24.0430908203125, 2.621551513671875, 239.84033203125, 70.73153686523438, 195.40570068359375, -12.944305419921875, 257.92694091796875, -32.989898681640625, 11.877300262451172, 133.76699829101562, 44.0133056640625, 87.44480895996094, 156.58335876464844, 75.4901123046875, 23.474578857421875, 74.31423950195312, -37.79437255859375, 233.68751525878906, 182.0296630859375, 187.98902893066406, -87.13778686523438, -25.8236083984375, 10.054801940917969, 70.26266479492188, 28.85357666015625, 8.765380859375, -17.39118194580078, -108.8353271484375, 32.65032958984375, -93.70266723632812, 89.29571533203125, 142.71592712402344, 80.56259155273438, 68.03634643554688, 2.054229736328125, 62.771392822265625, 62.745269775390625, 15.311553955078125, 92.41537475585938, 168.58450317382812, -28.2357177734375, 146.75360107421875, -122.90338134765625, 166.57125854492188, -34.55290222167969, 94.32533264160156, 55.04792022705078, -0.073272705078125, -14.902847290039062, 124.898681640625, -90.03408813476562, 96.0682373046875, 57.41666030883789, 77.248046875, -0.525665283203125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000301.npy"}
{"epoch": 0.6303664921465969, "step": 302, "batch_size": 128, "mean": 66.61660766601562, "std": 98.44170379638672, "min": -173.9178466796875, "p10": -36.253228759765626, "median": 56.41383743286133, "p90": 203.1357452392578, "max": 309.3896484375, "pos_frac": 0.7578125, "sample": [119.88818359375, 156.1956787109375, 177.529541015625, 170.776611328125, 151.3997802734375, 82.52078247070312, 60.00691223144531, -49.168365478515625, -102.4949951171875, 74.46783447265625, -107.21893310546875, -54.52705383300781, 209.78155517578125, 201.0577392578125, -35.044219970703125, 91.84934997558594, 107.52313232421875, -6.89605712890625, -120.27914428710938, 11.647087097167969, 22.91473388671875, 216.7752685546875, 199.63214111328125, -19.72521209716797, 27.612335205078125, 6.774261474609375, 213.7257843017578, -136.83547973632812, 67.48626708984375, -33.348236083984375, 147.39059448242188, -106.34742736816406, 46.81735610961914, 76.38935852050781, 0.0, 165.04803466796875, 277.74273681640625, 73.83944702148438, 159.3943328857422, 45.82171630859375, 195.24798583984375, 146.24261474609375, 219.13824462890625, 7.334716796875, 57.943565368652344, 17.704925537109375, 94.6513900756836, 4.88372802734375, -7.340827941894531, -1.6491813659667969, 152.37289428710938, 128.00311279296875, 17.2733154296875, -6.4871826171875, -31.886154174804688, 26.833770751953125, 309.3896484375, -15.994842529296875, -118.08859252929688, 10.861167907714844, 156.208984375, 3.7316741943359375, 187.00228881835938, 11.039215087890625, 5.87469482421875, 70.37347412109375, -173.9178466796875, 222.8538055419922, 15.627777099609375, 6.485626220703125, -36.67974853515625, 59.011962890625, 119.64642333984375, 42.84918212890625, 204.02392578125, -29.752098083496094, 42.247657775878906, 92.5360107421875, 150.1927490234375, -23.748489379882812, 14.01416015625, -18.353485107421875, 269.879638671875, 121.3828125, 193.37948608398438, -1.439727783203125, 50.4906005859375, 153.1925048828125, -7.737823486328125, 4.2570648193359375, 91.76979064941406, -36.0704345703125, 14.119857788085938, 193.6641845703125, 63.819915771484375, 103.76107788085938, 202.75509643554688, 212.92330932617188, -115.08078002929688, 17.608261108398438, 41.368751525878906, 59.85758972167969, 223.7156982421875, 106.72879028320312, -77.44891357421875, 140.3317108154297, 56.87303924560547, 217.1654052734375, 137.86376953125, 77.52479553222656, 108.4345703125, 1.673492431640625, -6.152259826660156, 112.29385375976562, 182.80352783203125, 64.12548828125, 47.126869201660156, 146.60157775878906, -33.195037841796875, 36.70463562011719, -101.57485961914062, 27.09356689453125, 0.4942054748535156, 239.4068603515625, 55.95463562011719, 24.965484619140625, 172.0904541015625, 13.598052978515625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000302.npy"}
{"epoch": 0.6324607329842932, "step": 303, "batch_size": 128, "mean": 60.499977111816406, "std": 96.6457748413086, "min": -184.69387817382812, "p10": -50.81181259155272, "median": 40.831199645996094, "p90": 188.4215698242187, "max": 306.7333984375, "pos_frac": 0.7578125, "sample": [29.275909423828125, 19.049148559570312, -89.54885864257812, -153.5859375, -71.71723937988281, 131.56150817871094, -36.893341064453125, 59.0103759765625, -87.1717529296875, 12.490299224853516, -19.701570510864258, 144.9754638671875, 141.22048950195312, 99.87881469726562, 177.2825164794922, -17.001922607421875, -75.54104614257812, 65.09368896484375, 108.4136962890625, 29.107208251953125, 19.34619140625, 8.368301391601562, -30.01239013671875, 40.88804626464844, 69.35299682617188, -175.0718994140625, 163.5478057861328, -80.2420883178711, 44.80732345581055, 101.5345458984375, 17.847259521484375, 208.2366943359375, 153.84890747070312, 29.236427307128906, 141.659423828125, 238.94931030273438, 19.7569580078125, 82.03778076171875, 0.0, 28.88524627685547, 25.559722900390625, 17.0501708984375, 10.717964172363281, 17.25732421875, 127.3453369140625, -184.69387817382812, 202.78817749023438, 249.15748596191406, 147.8634033203125, 32.864288330078125, 159.12738037109375, -21.3756103515625, -78.03826904296875, -174.86502075195312, 59.9344482421875, 2.5910797119140625, 38.760963439941406, 24.689285278320312, 203.67373657226562, 37.68218994140625, 119.53544616699219, -16.91583251953125, 115.68179321289062, 7.9438934326171875, 80.91783142089844, 146.61329650878906, -41.98394775390625, 124.35004425048828, 12.2022705078125, -4.085968017578125, -59.50719451904297, 0.549072265625, 85.7447509765625, 40.77435302734375, 18.64057159423828, 153.47793579101562, 117.62091064453125, 2.9996795654296875, 87.61688232421875, 1.1947174072265625, -19.39752197265625, -37.18302917480469, 122.64116668701172, 158.77032470703125, -41.72130584716797, 124.75167846679688, 126.0374755859375, 181.3408660888672, -18.059356689453125, 117.0352783203125, 100.30899047851562, -95.150146484375, 306.7333984375, 6.62744140625, 228.32501220703125, 9.1785888671875, 183.65396118164062, 160.19801330566406, 100.3819580078125, -33.71185302734375, 6.698261260986328, 1.91351318359375, 124.4842529296875, 27.858367919921875, 210.77001953125, 94.58802032470703, 207.39414978027344, 163.0159912109375, 2.6245880126953125, -47.08522033691406, 20.190399169921875, -22.123077392578125, -66.82444763183594, 173.11053466796875, 199.54598999023438, 262.2379150390625, 202.08062744140625, 118.38127899169922, 54.791534423828125, 74.52227783203125, 173.5351104736328, -9.194625854492188, 130.48614501953125, -0.986663818359375, 95.84393310546875, 78.52297973632812, 131.4136962890625, 214.80520629882812], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000303.npy"}
{"epoch": 0.6345549738219896, "step": 304, "batch_size": 128, "mean": 65.55952453613281, "std": 100.4158706665039, "min": -169.2327117919922, "p10": -53.03963317871093, "median": 56.976600646972656, "p90": 193.19058532714837, "max": 391.0982666015625, "pos_frac": 0.734375, "sample": [0.0, 56.38380432128906, -33.284423828125, 63.78376007080078, 132.72445678710938, -0.7432708740234375, 236.98556518554688, 92.30419921875, 28.296539306640625, 123.22828674316406, 111.44654846191406, 15.81683349609375, -15.469970703125, 101.33056640625, 229.05450439453125, -49.667633056640625, 7.7137908935546875, 22.654319763183594, -169.2327117919922, 51.247802734375, -10.690338134765625, 139.75277709960938, 152.7576446533203, 124.962890625, 212.3325653076172, 127.12556457519531, 22.001792907714844, -65.34332275390625, -25.90985107421875, -50.58355712890625, 164.11361694335938, 53.290008544921875, -100.46267700195312, 8.625732421875, -87.4477310180664, 127.20880126953125, 107.85377502441406, -24.223312377929688, 231.80615234375, 102.13491821289062, 103.67007446289062, 19.972675323486328, 24.676422119140625, 1.3569488525390625, 72.41943359375, -68.39166259765625, 0.0, 162.77883911132812, 215.97372436523438, 216.825439453125, -59.26275634765625, 105.015869140625, 34.71673583984375, 60.6075439453125, 71.98074340820312, -44.42817687988281, 357.2091064453125, 57.56939697265625, 391.0982666015625, 0.737884521484375, -7.68133544921875, 12.201690673828125, 5.5430450439453125, 174.16522216796875, 95.37139892578125, -59.06268310546875, 97.91841125488281, 63.32391357421875, 19.61285400390625, 88.30689239501953, 221.62747192382812, -47.64520263671875, 58.04112243652344, 166.6543731689453, 102.54443359375, 101.19752502441406, -46.502864837646484, 26.030601501464844, 27.616668701171875, 76.09701538085938, 97.22857666015625, -21.5418701171875, -58.770477294921875, 308.8701477050781, 30.29906463623047, 127.12875366210938, 30.05682373046875, 229.84634399414062, 180.50494384765625, -124.775390625, 73.38113403320312, 5.81157112121582, 39.799156188964844, 150.40576171875, -95.9298095703125, 157.33908081054688, 117.33192443847656, 0.3609466552734375, -101.07965850830078, 160.45449829101562, 186.26382446289062, 69.05834197998047, -14.182052612304688, 27.235504150390625, 68.06532287597656, -63.03245544433594, -8.409698486328125, 128.07666015625, -72.64181518554688, 25.300399780273438, -18.737747192382812, 166.19207763671875, 177.40151977539062, -32.98638153076172, 157.47157287597656, -45.12603759765625, 85.07136535644531, 132.39846801757812, 139.0365753173828, 46.33880615234375, 19.7987060546875, 111.1846923828125, 312.9925231933594, 28.75136375427246, -32.695648193359375, 153.677001953125, 15.276153564453125, 209.35302734375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000304.npy"}
{"epoch": 0.6366492146596858, "step": 305, "batch_size": 128, "mean": 89.92192840576172, "std": 103.89984130859375, "min": -206.9451904296875, "p10": -27.473646545410155, "median": 74.4639892578125, "p90": 225.2398483276367, "max": 376.1151123046875, "pos_frac": 0.84375, "sample": [59.367488861083984, 139.36451721191406, 20.516212463378906, 212.072265625, 145.38970947265625, 59.976715087890625, 218.03335571289062, 102.0955810546875, -47.65875244140625, 52.56072998046875, 122.35293579101562, 5.742633819580078, 115.06854248046875, 58.84370422363281, -105.36489868164062, -136.117919921875, 54.085357666015625, -0.6971206665039062, 163.12234497070312, 2.6117019653320312, 123.55934143066406, 227.77020263671875, 318.9942626953125, 140.9792938232422, 61.326934814453125, 206.4210205078125, 224.15541076660156, 32.485130310058594, 106.61902618408203, 25.60394287109375, 102.22810363769531, -28.930160522460938, 26.98822021484375, 3.95172119140625, 36.665306091308594, -19.767417907714844, 49.191619873046875, 5.708702087402344, 190.8563232421875, 153.15682983398438, 130.75033569335938, 268.2420959472656, 117.21217346191406, 95.36471557617188, 168.22119140625, 303.5114440917969, 108.7838134765625, 45.679046630859375, 171.27911376953125, 210.0118408203125, -8.841194152832031, 128.4542999267578, 118.40194702148438, 23.17486572265625, 150.4581756591797, 47.03106689453125, 14.815467834472656, 90.16908264160156, -39.6497802734375, 326.2197570800781, 132.17294311523438, -101.62232208251953, 103.55914306640625, 35.84930419921875, 92.34931945800781, 23.208099365234375, 52.120269775390625, 49.216522216796875, -68.94735717773438, 84.52667236328125, -59.645790100097656, 167.40115356445312, 20.995590209960938, 20.01873779296875, 14.30743408203125, -13.956771850585938, 376.1151123046875, 110.54598999023438, 231.29669189453125, 112.04571533203125, -102.15095520019531, 242.8350067138672, 143.18228149414062, -206.9451904296875, 162.27706909179688, 75.82928466796875, 73.09869384765625, 43.1551513671875, 11.580078125, 26.491546630859375, 174.7384033203125, 119.6099853515625, 273.5754699707031, 58.61383056640625, 299.63446044921875, 159.22549438476562, -26.84942626953125, 111.75274658203125, 10.124588012695312, -13.24371337890625, 354.1520690917969, 154.3093719482422, 322.03973388671875, 175.417724609375, 17.1807861328125, 57.8721923828125, 50.2635498046875, 172.05313110351562, 30.625076293945312, 72.50102233886719, 25.46337890625, 222.8229217529297, 41.171417236328125, 69.20298767089844, -34.72344970703125, 131.01983642578125, 87.56549072265625, 157.05328369140625, 112.82351684570312, -67.76351928710938, 58.63739013671875, 109.17987060546875, 125.82464599609375, 62.646461486816406, 265.14593505859375, -34.31205749511719, -0.70501708984375, 55.829002380371094], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000305.npy"}
{"epoch": 0.6387434554973822, "step": 306, "batch_size": 128, "mean": 57.32515335083008, "std": 90.34192657470703, "min": -130.41488647460938, "p10": -54.24306488037109, "median": 52.70374298095703, "p90": 186.09780273437497, "max": 271.8448791503906, "pos_frac": 0.7421875, "sample": [27.297027587890625, 36.681243896484375, 20.551483154296875, 218.99880981445312, 6.351776123046875, 89.7435302734375, 97.06741333007812, 133.6722412109375, -73.88653564453125, 114.21414184570312, -13.997764587402344, 38.48779296875, 79.03103637695312, -125.66339111328125, -80.40093994140625, 145.56890869140625, 159.32460021972656, 14.510557174682617, 119.53547668457031, 53.84964370727539, 140.13525390625, -21.32684326171875, 132.83773803710938, 16.3995304107666, 72.40850830078125, 51.930938720703125, 91.25152587890625, 89.27481079101562, 152.47381591796875, 138.88882446289062, 64.20574951171875, 245.8360595703125, -100.22772216796875, 271.8448791503906, -26.49233627319336, -8.315963745117188, 88.26190185546875, 206.8062744140625, 93.42343139648438, 6.22698974609375, 87.12771606445312, 66.64308166503906, -84.2138671875, 22.64361572265625, -42.05950927734375, 195.41424560546875, 15.87060546875, -130.41488647460938, 82.93913269042969, -52.125885009765625, 39.813255310058594, -73.1458740234375, 243.86134338378906, 101.51275634765625, 198.33316040039062, 135.896728515625, 148.13037109375, -59.18315124511719, 25.3814697265625, 111.91098022460938, -101.05961608886719, -16.480438232421875, 10.2802734375, -6.43316650390625, -44.784423828125, 209.63916015625, -34.30889892578125, 24.625946044921875, -65.54147338867188, 28.224781036376953, -2.644287109375, 53.03578186035156, -116.30831909179688, 203.71023559570312, 128.51544189453125, 138.4691162109375, 6.354652404785156, 152.4761962890625, 8.749794006347656, 162.61322021484375, 96.10284423828125, 63.10400390625, 238.74560546875, 37.636070251464844, 37.434226989746094, -0.7288360595703125, 104.49186706542969, 6.613677978515625, 52.3717041015625, 224.93719482421875, 111.35955810546875, 163.777099609375, 10.595611572265625, 168.87619018554688, -20.77838134765625, -45.967689514160156, 182.10504150390625, 97.74615478515625, -41.7239990234375, -62.26332092285156, 75.9031982421875, 14.808090209960938, 46.19050598144531, 70.80911254882812, 81.46823120117188, 2.6910552978515625, 76.02032470703125, 28.2030029296875, -26.797210693359375, 7.52142333984375, 181.50274658203125, 22.83000946044922, 55.30792236328125, 59.55392074584961, 6.2943115234375, -19.407379150390625, 80.87294006347656, -120.91813659667969, -39.68699645996094, 4.1956787109375, 78.62124633789062, -35.84161376953125, -17.962371826171875, 92.85997009277344, 55.296104431152344, 200.80120849609375, 240.99945068359375, 150.79879760742188], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000306.npy"}
{"epoch": 0.6408376963350786, "step": 307, "batch_size": 128, "mean": 62.57981872558594, "std": 91.67069244384766, "min": -175.85247802734375, "p10": -55.93680419921873, "median": 66.23980712890625, "p90": 181.26166687011718, "max": 289.1475830078125, "pos_frac": 0.7578125, "sample": [156.54852294921875, 206.8677978515625, 22.292083740234375, 186.903076171875, 148.96707153320312, 101.22916412353516, 38.584259033203125, -15.123733520507812, 263.3179016113281, 192.99288940429688, 97.28097534179688, 70.05978393554688, 24.505859375, 128.53219604492188, -4.80291748046875, 47.72871780395508, 13.102947235107422, -116.51099395751953, 181.765625, 213.51763916015625, 147.818359375, -83.65312957763672, -91.3493423461914, 147.16583251953125, 136.27561950683594, -120.54629516601562, 42.73183059692383, 149.12411499023438, 289.1475830078125, 58.7479248046875, 56.50749206542969, 132.1258087158203, 0.38873291015625, -6.75438117980957, 23.931629180908203, 137.14010620117188, 130.08285522460938, 94.69964599609375, 42.807525634765625, 85.93830108642578, 188.33462524414062, -120.21640014648438, 67.46890258789062, 4.366912841796875, 183.86508178710938, 40.407615661621094, 14.160018920898438, 149.82089233398438, 72.49129486083984, 11.356529235839844, -39.767822265625, 83.80390167236328, -147.99761962890625, 252.8365478515625, 78.9659423828125, 92.51779174804688, 162.83856201171875, 137.61517333984375, 29.947616577148438, 181.04568481445312, 80.66578674316406, -8.998870849609375, 130.28732299804688, 145.78628540039062, 86.15884399414062, 160.64549255371094, -175.85247802734375, -33.3116455078125, -41.183929443359375, 18.54731559753418, 48.11182403564453, -21.22808837890625, 166.87530517578125, -18.17449951171875, 39.6685791015625, 50.536529541015625, 106.3824462890625, 189.63916015625, 39.38542175292969, -0.7844467163085938, -15.4361572265625, -49.933837890625, -34.40301513671875, -19.819976806640625, -133.390625, 11.2115478515625, -7.581695556640625, 87.02775573730469, 45.47956848144531, 95.28594970703125, 132.3738555908203, -79.77243041992188, 16.595306396484375, -1.6952285766601562, 146.61105346679688, 82.68791198730469, 142.91009521484375, -87.27174377441406, -25.2093505859375, 42.44738006591797, 78.10943603515625, -75.14276885986328, 86.14146423339844, -22.349811553955078, 25.63043212890625, 90.04092407226562, 101.43275451660156, 35.69610595703125, 31.974853515625, 125.26628875732422, -104.6561279296875, 101.18710327148438, 65.01071166992188, 143.76393127441406, 82.888427734375, 21.95758056640625, 157.58575439453125, 52.574745178222656, -69.9437255859375, 72.8021240234375, 112.12947082519531, 231.95654296875, 107.70120239257812, 17.3558349609375, 132.862548828125, 2.7559738159179688, 221.5633544921875, 98.69859313964844], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000307.npy"}
{"epoch": 0.6429319371727749, "step": 308, "batch_size": 128, "mean": 45.63470458984375, "std": 98.2874526977539, "min": -275.6773986816406, "p10": -92.64179229736328, "median": 39.60767364501953, "p90": 152.3418991088867, "max": 440.8927001953125, "pos_frac": 0.703125, "sample": [152.33265686035156, 45.68730926513672, -40.168212890625, 94.49239349365234, 3.81103515625, 62.244415283203125, -83.23184204101562, -27.829498291015625, 111.42677307128906, 37.46937561035156, 159.42764282226562, 99.65435791015625, -96.39435577392578, 160.923583984375, 14.891044616699219, 23.04302978515625, 261.3917541503906, 26.47747802734375, -0.2717456817626953, -118.37142944335938, 164.70751953125, 229.83377075195312, 59.549285888671875, 136.73245239257812, 85.20394897460938, 108.651123046875, 30.261863708496094, -95.10086059570312, -58.0390625, 27.91307830810547, 11.61737060546875, 38.9158935546875, 88.56573486328125, -103.3310546875, -20.2188720703125, 118.42227172851562, 110.71728515625, -1.1763687133789062, 121.753662109375, 20.054046630859375, 122.42684936523438, 102.05210876464844, 91.17605590820312, 0.07315826416015625, 20.926498413085938, 174.5314483642578, -71.57052612304688, 82.35040283203125, 127.52516174316406, 32.411712646484375, 104.82199096679688, 109.37742614746094, 81.50033569335938, 152.36346435546875, 248.18228149414062, -91.58790588378906, 98.51199340820312, -95.17294311523438, 133.47607421875, 113.64227294921875, -41.09016418457031, 90.89784240722656, -43.275177001953125, 14.151214599609375, 23.353302001953125, 40.14239501953125, -101.5987548828125, 130.03399658203125, -4.518035888671875, 32.44963836669922, 249.477783203125, -96.6607666015625, 13.32025146484375, 144.64382934570312, -275.6773986816406, -21.50055694580078, 138.35052490234375, -10.976448059082031, -131.74642944335938, 107.71005249023438, 133.52369689941406, 73.01119995117188, 106.19686889648438, 82.03982543945312, 32.99658966064453, 39.79417419433594, 58.11431884765625, -2.194812774658203, -81.54360961914062, 55.49550247192383, 20.655364990234375, 51.89578628540039, -19.2559814453125, 71.4193115234375, 149.8927001953125, 30.24346923828125, -95.82151794433594, -153.89309692382812, -24.73870849609375, 48.90081787109375, -54.068939208984375, 29.183273315429688, 3.9298095703125, 143.2198486328125, 28.56817626953125, 212.03506469726562, 74.138671875, -15.7257080078125, -18.871253967285156, 5.9938812255859375, -16.0081787109375, -101.96429443359375, 206.7001953125, 137.43896484375, 440.8927001953125, -8.740653991699219, 140.8052215576172, 51.519927978515625, -172.50836181640625, 48.4281005859375, -18.783203125, 33.39398193359375, 58.107215881347656, 47.792320251464844, 88.7541732788086, -31.376953125, 155.69113159179688, 39.421173095703125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000308.npy"}
{"epoch": 0.6450261780104712, "step": 309, "batch_size": 128, "mean": 72.0164794921875, "std": 100.5754165649414, "min": -172.80465698242188, "p10": -39.685447692871094, "median": 57.885581970214844, "p90": 202.76349334716795, "max": 347.08929443359375, "pos_frac": 0.7734375, "sample": [11.425674438476562, 115.86674499511719, 234.03781127929688, 101.93705749511719, 62.084075927734375, 3.330535888671875, 8.005126953125, -2.7337799072265625, 16.722900390625, -4.437183380126953, -10.619361877441406, 136.47232055664062, -53.4500732421875, 5.403106689453125, 187.77281188964844, 0.030340194702148438, -32.356781005859375, 309.63970947265625, -2.0134429931640625, 55.4049072265625, 74.80995178222656, 14.560157775878906, 94.67434692382812, 5.216682434082031, 95.48764038085938, 161.64700317382812, 24.121688842773438, 84.14439392089844, 31.657882690429688, 197.35586547851562, 26.17657470703125, 123.733642578125, -13.773460388183594, -3.867828369140625, 101.56463623046875, -27.399639129638672, 95.23095703125, -12.048812866210938, 279.5908203125, -155.18817138671875, 14.782257080078125, 132.552978515625, 2.404449462890625, -136.67108154296875, 60.045654296875, -107.8934326171875, 109.155517578125, 104.41152954101562, 190.90464782714844, 57.5789794921875, -38.94361877441406, 141.77896118164062, -41.4163818359375, 93.17167663574219, 98.47433471679688, 181.58340454101562, 122.06936645507812, 119.22738647460938, 25.63235092163086, 51.04908752441406, 95.218505859375, 336.4075622558594, -12.40582275390625, 192.7194366455078, 126.29214477539062, 70.88031005859375, 152.65676879882812, 2.275604248046875, 199.62350463867188, 104.32327270507812, 21.310321807861328, 29.094024658203125, 42.13507080078125, 15.481170654296875, -80.14900970458984, 227.21685791015625, -51.405029296875, 36.25726318359375, 5.94354248046875, 45.36279296875, -32.88716125488281, 347.08929443359375, 38.07209777832031, 112.62080383300781, 162.70745849609375, -45.55645751953125, 213.22998046875, 69.8551025390625, 261.1890869140625, 42.48461151123047, 118.4237060546875, 58.19218444824219, 241.31903076171875, 102.1259536743164, 166.83062744140625, -12.25213623046875, -112.27006530761719, 44.906280517578125, -72.4860610961914, 49.19861602783203, 210.0901336669922, -46.52507781982422, -8.745071411132812, 16.00531005859375, 56.44337463378906, 95.23602294921875, -11.675506591796875, 184.6204833984375, 221.5732421875, -23.457061767578125, 115.45488739013672, 253.3575439453125, 80.79763793945312, 153.37168884277344, 33.757503509521484, 247.43670654296875, 30.887908935546875, -172.80465698242188, 154.40768432617188, 197.84613037109375, 22.587814331054688, -122.08421325683594, 89.19308471679688, 153.30604553222656, 160.52059936523438, 50.49342346191406, 64.07266235351562, 179.79562377929688], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000309.npy"}
{"epoch": 0.6471204188481675, "step": 310, "batch_size": 128, "mean": 67.19332885742188, "std": 97.84071350097656, "min": -155.53485107421875, "p10": -48.26713981628417, "median": 55.97808837890625, "p90": 189.02532348632812, "max": 381.03173828125, "pos_frac": 0.765625, "sample": [262.0863037109375, 91.30694580078125, 38.79261779785156, 293.54254150390625, -53.4117431640625, 190.328125, -31.977691650390625, -34.768524169921875, 32.8878173828125, 25.14394187927246, 138.998779296875, -124.0501708984375, 56.018096923828125, 95.07575988769531, 0.9622802734375, 55.938079833984375, -112.03341674804688, -35.78456115722656, -31.897865295410156, 143.01620483398438, 69.63995361328125, 148.16957092285156, -33.97088623046875, 64.20783996582031, -155.53485107421875, 88.83062744140625, 142.17510986328125, 113.3564453125, 18.831939697265625, 97.90310668945312, 133.44131469726562, 11.443603515625, -52.77996826171875, 107.24176025390625, 16.0699462890625, 117.67889404296875, 101.91651916503906, 172.39801025390625, -0.80413818359375, -93.52392578125, 47.37376403808594, 79.38162231445312, 10.582351684570312, 90.98165893554688, 126.68850708007812, 49.233673095703125, 45.90824890136719, 6.803680419921875, 116.87026977539062, 4.4470672607421875, -37.938194274902344, -51.736663818359375, 157.52786254882812, 201.23143005371094, -32.113800048828125, -122.59596252441406, -25.740127563476562, 67.00689697265625, 106.98455810546875, 7.682952880859375, 180.77691650390625, 71.7637710571289, 23.38530731201172, -42.264404296875, 147.40196228027344, 47.603271484375, 154.5792694091797, 335.8084716796875, -25.282997131347656, 15.777557373046875, 49.740081787109375, 86.48489379882812, 102.49359130859375, 37.55702209472656, 80.17758178710938, -77.217529296875, 381.03173828125, 148.09280395507812, -46.78020095825195, -109.48818969726562, -21.568801879882812, 113.926025390625, 134.92274475097656, 32.38189697265625, 172.215087890625, 61.732666015625, -27.050430297851562, -103.45846557617188, 42.16722869873047, 53.154327392578125, 10.518951416015625, 28.8138427734375, 119.04681396484375, -2.0946311950683594, 259.3487854003906, 212.80276489257812, 135.11569213867188, 143.5659637451172, 78.14935302734375, 213.3846435546875, -111.79242706298828, 17.824920654296875, 160.03317260742188, 144.7755584716797, 108.46707916259766, -18.933013916015625, 140.77772521972656, 19.725948333740234, 73.47685241699219, 188.46697998046875, 107.38754272460938, 27.79437255859375, 136.46701049804688, 236.82073974609375, 226.55508422851562, 39.37599182128906, -57.24388122558594, 50.36334228515625, 52.396705627441406, 55.864501953125, 145.1850128173828, 45.13946533203125, -20.786163330078125, 2.2494659423828125, 128.20013427734375, 136.87606811523438, 193.67782592773438, 237.4404296875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000310.npy"}
{"epoch": 0.6492146596858639, "step": 311, "batch_size": 128, "mean": 68.92770385742188, "std": 101.7256088256836, "min": -243.9716796875, "p10": -56.75208435058593, "median": 72.98040008544922, "p90": 199.64722900390626, "max": 373.5345458984375, "pos_frac": 0.7421875, "sample": [75.65850830078125, -19.204498291015625, 171.92575073242188, 89.27085876464844, -221.951416015625, 92.82002258300781, 104.319091796875, 212.032470703125, 100.0054931640625, 56.998191833496094, -11.26474380493164, 57.870819091796875, -93.5443115234375, 109.22866821289062, 140.0977783203125, 69.55615234375, 199.56365966796875, -48.384490966796875, 82.3995361328125, -19.097198486328125, 373.5345458984375, -14.533111572265625, -72.49839782714844, 160.5492401123047, 263.41180419921875, 32.994873046875, -59.195098876953125, 4.276050567626953, 2.95489501953125, 89.0465087890625, -46.633087158203125, -24.409378051757812, 188.53921508789062, 4.290536880493164, 191.8564453125, 28.607833862304688, -85.77764892578125, 5.36474609375, 104.14938354492188, 84.0928955078125, 213.06851196289062, 130.80795288085938, 51.025299072265625, 70.30229187011719, 29.29345703125, 118.31666564941406, 139.47903442382812, 50.617828369140625, 88.4795150756836, 58.752662658691406, 99.26766967773438, -85.03974151611328, 22.22159194946289, 129.0853271484375, -63.070987701416016, -16.7984619140625, -13.026351928710938, -22.40325927734375, 202.2951202392578, 218.4508056640625, 159.32785034179688, 173.15599060058594, 119.31182861328125, 24.892189025878906, 199.84222412109375, 134.11383056640625, 142.25631713867188, 97.57026672363281, -102.64395141601562, 176.1713104248047, 63.06743621826172, 209.57550048828125, 5.584747314453125, -39.76214599609375, 146.55609130859375, -6.8341522216796875, 45.50975799560547, 42.7484130859375, -29.3289794921875, 177.63037109375, 120.598388671875, 55.09996032714844, 67.32061767578125, 49.968971252441406, 92.82623291015625, 3.3269386291503906, 165.22032165527344, 223.80743408203125, 242.734375, 122.04010009765625, 119.42904663085938, 83.11557006835938, -118.37619018554688, 63.83811950683594, 80.60386657714844, 218.49191284179688, -27.98366355895996, 5.900871276855469, -55.705078125, -133.71533203125, -4.113071441650391, 138.7927703857422, -27.30596923828125, 64.12237548828125, 43.0634765625, 47.03033447265625, -9.255073547363281, -243.9716796875, 308.25433349609375, 137.19187927246094, 195.21127319335938, 56.995811462402344, 189.99462890625, 114.79193115234375, -12.383781433105469, 223.89755249023438, -38.071571350097656, 143.67578125, 83.3592529296875, 85.40780639648438, 81.44229125976562, 21.43572998046875, 144.1912841796875, -79.15382385253906, 106.41458129882812, 100.71815490722656, 122.919677734375, -89.24549865722656], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000311.npy"}
{"epoch": 0.6513089005235602, "step": 312, "batch_size": 128, "mean": 60.43415069580078, "std": 116.87911987304688, "min": -291.89434814453125, "p10": -61.279171752929685, "median": 48.950679779052734, "p90": 197.42706298828122, "max": 406.73541259765625, "pos_frac": 0.703125, "sample": [178.2639923095703, 2.2878570556640625, 75.46014404296875, 136.95135498046875, 4.1290283203125, 406.73541259765625, 120.465576171875, -34.46144104003906, 1.7071647644042969, 111.82659912109375, -102.69515991210938, -61.7872314453125, 103.74134826660156, 46.40728759765625, 216.41851806640625, 167.37310791015625, 32.23600769042969, -291.89434814453125, 94.80128479003906, 47.70147705078125, -206.2175750732422, 72.02494049072266, 194.56402587890625, -7.5251922607421875, 144.20980834960938, 166.54806518554688, -67.96917724609375, -18.3447265625, 167.7332763671875, -19.091957092285156, -16.91357421875, 62.80792999267578, 59.321044921875, -268.5655212402344, 251.33599853515625, 93.98797607421875, 129.25408935546875, -6.8431549072265625, -1.8253192901611328, 134.944091796875, 1.57879638671875, 50.19988250732422, 184.93572998046875, 18.80999755859375, -42.112548828125, 10.584701538085938, 7.57940673828125, 45.76556396484375, -82.16749572753906, 66.43902587890625, 4.55279541015625, 91.970703125, 23.285797119140625, 4.047416687011719, 101.3544921875, 178.23587036132812, -73.18878173828125, 0.0, 169.12274169921875, 61.683807373046875, 124.09344482421875, -59.34709167480469, -85.94644927978516, 121.37796020507812, -9.431564331054688, 4.057769775390625, 98.25592041015625, -103.02641296386719, 177.66629028320312, 369.67510986328125, -24.854759216308594, 129.03350830078125, -26.39057159423828, -4.9668731689453125, 61.48919677734375, 204.10748291015625, 58.69187927246094, 4.00848388671875, -19.833816528320312, 267.9007568359375, 13.664264678955078, -6.245990753173828, 32.501708984375, 135.37835693359375, 0.563629150390625, -82.69876098632812, 250.66558837890625, 59.47540283203125, -53.11102294921875, -136.3726806640625, 17.8218994140625, 46.833221435546875, -164.38699340820312, 173.677490234375, -47.80889892578125, 97.13519287109375, -5.14080810546875, 93.06976318359375, 165.56951904296875, 380.32391357421875, 71.77887725830078, 131.08023071289062, 91.08865356445312, 221.15737915039062, -45.69329833984375, -24.62554168701172, 247.87112426757812, 23.112770080566406, 1.729705810546875, -17.312103271484375, 141.30001831054688, -61.061431884765625, 0.7654800415039062, 205.92837524414062, 62.278076171875, 151.90737915039062, 261.9190979003906, 362.50830078125, 160.64535522460938, -32.93505859375, 156.4534454345703, 142.7583465576172, 123.76776123046875, -7.538185119628906, 82.30281066894531, 64.42269897460938, 12.527620315551758, 8.178070068359375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000312.npy"}
{"epoch": 0.6534031413612565, "step": 313, "batch_size": 128, "mean": 63.95604705810547, "std": 112.4479751586914, "min": -211.47837829589844, "p10": -87.3532974243164, "median": 59.08818435668945, "p90": 197.98822937011718, "max": 616.4095458984375, "pos_frac": 0.7265625, "sample": [299.0180969238281, 249.979736328125, -41.8726806640625, -30.964431762695312, 73.12835693359375, 84.9949951171875, -83.8165283203125, 176.7913818359375, 69.72225952148438, 103.28213500976562, 17.93255615234375, -95.96856689453125, 12.617691040039062, 199.78265380859375, 108.97978210449219, 17.571382522583008, 76.67432403564453, 9.4031982421875, -22.613983154296875, 144.8645782470703, 33.9739875793457, 221.73065185546875, -110.79995727539062, -133.85317993164062, 21.966888427734375, -98.80572509765625, 58.759674072265625, -92.90769958496094, 47.0185546875, 32.76080322265625, 52.671817779541016, -6.4104461669921875, 5.8309326171875, 14.197860717773438, 49.715972900390625, 105.33041381835938, 59.41669464111328, 10.83843994140625, 234.276123046875, 70.26679992675781, -24.85479736328125, 70.723876953125, -7.341190338134766, 147.53045654296875, -107.499755859375, 35.094085693359375, 28.86334228515625, -0.8152885437011719, 3.8872146606445312, -43.418792724609375, 159.8275909423828, 9.5419921875, 65.05517578125, 140.8291473388672, 31.491764068603516, 197.58355712890625, 58.462799072265625, 118.51241302490234, 39.145294189453125, -18.397247314453125, 129.7681427001953, 109.86466979980469, -6.67706298828125, -211.47837829589844, 191.1015625, -1.12713623046875, -55.892242431640625, -0.838623046875, -110.19931030273438, 186.36297607421875, 252.99713134765625, 63.94578552246094, 114.89828491210938, -6.2147216796875, -110.52362060546875, 69.22976684570312, 196.12913513183594, 72.35104370117188, 0.1117706298828125, -10.30926513671875, 215.11328125, 142.60629272460938, 5.88421630859375, 13.6431884765625, 40.314849853515625, 5.383392333984375, 77.95996856689453, -40.11164855957031, 66.20922088623047, 60.22105407714844, 53.1856689453125, 616.4095458984375, -8.259452819824219, -183.10797119140625, -93.94158935546875, 134.9166259765625, -84.97283935546875, 196.1112060546875, -34.06653594970703, 179.4369659423828, -101.74954986572266, 21.788009643554688, 66.74267578125, -96.59405517578125, 89.8055419921875, 243.4005126953125, 156.94610595703125, 242.8836669921875, 157.8092041015625, -20.86505126953125, 153.38438415527344, 68.66471099853516, 54.770233154296875, 198.93246459960938, -63.75557327270508, 115.75837707519531, 253.88803100585938, 202.293701171875, 134.2078857421875, 134.66574096679688, 109.87252044677734, 149.515869140625, 133.70260620117188, 161.79396057128906, 114.81195068359375, 162.469482421875, 86.8675537109375, 168.22030639648438], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000313.npy"}
{"epoch": 0.6554973821989529, "step": 314, "batch_size": 128, "mean": 62.435935974121094, "std": 94.58586120605469, "min": -169.59817504882812, "p10": -45.48243408203124, "median": 54.23576354980469, "p90": 175.3882354736328, "max": 355.98931884765625, "pos_frac": 0.734375, "sample": [34.60572814941406, 183.69650268554688, 157.4864959716797, 67.1717529296875, 0.998138427734375, -40.73350524902344, 136.98789978027344, 70.9080810546875, -42.379669189453125, 178.11126708984375, -5.0321197509765625, 146.48516845703125, 86.4898681640625, -16.58636474609375, 122.57058715820312, 16.906639099121094, -65.30950927734375, -137.5120849609375, -103.33767700195312, 143.17608642578125, 151.50393676757812, 24.101119995117188, 9.11053466796875, 22.4017333984375, 153.01263427734375, 101.23150634765625, 55.83030700683594, 155.6923828125, -82.35406494140625, 29.839263916015625, -2.018157958984375, 355.98931884765625, 114.98764038085938, 57.97381591796875, -52.36994171142578, 89.41128540039062, 114.97116088867188, 74.38572692871094, 74.8572998046875, 258.5638732910156, 150.32339477539062, -52.04400634765625, 15.741943359375, -11.432342529296875, 24.339675903320312, 143.3948974609375, 23.18193817138672, 55.449493408203125, 29.415283203125, 16.30950927734375, 66.46534729003906, 19.3428955078125, 84.08561706542969, 19.014724731445312, -40.55572509765625, -169.59817504882812, 0.0, 157.15109252929688, 15.70294189453125, 95.0115966796875, 141.1751708984375, -18.91187286376953, 21.63036346435547, 123.62066650390625, -20.526824951171875, -51.74485778808594, -163.5723419189453, 154.66531372070312, -34.600341796875, 1.0814361572265625, 10.8358154296875, 154.32635498046875, -32.23771667480469, 40.28761291503906, 96.731689453125, 115.21221923828125, 62.814422607421875, 10.5069580078125, 49.98048400878906, 2.905426025390625, 79.97079467773438, 167.61032104492188, -23.9443359375, 9.316375732421875, 48.338165283203125, -24.984527587890625, 174.22122192382812, 112.91061401367188, 172.7962646484375, 68.56333923339844, 118.55523681640625, 53.02203369140625, 38.636932373046875, 22.807037353515625, -18.640045166015625, 108.74343872070312, 291.6044006347656, 164.06649780273438, 182.0390625, -104.81735229492188, 17.52970314025879, 81.68023681640625, 202.24624633789062, 66.15807342529297, 45.199615478515625, 244.39549255371094, 197.4876708984375, -42.79853820800781, 164.6759796142578, 165.0341796875, 139.5940704345703, -0.81390380859375, -10.486846923828125, 214.46414184570312, -55.575225830078125, -81.00146484375, 80.02828979492188, 96.68006134033203, 237.306640625, -1.8717575073242188, 151.05349731445312, 45.2410888671875, -31.8101806640625, 242.01097106933594, -73.99801635742188, 196.84732055664062, 141.7367706298828, -27.335002899169922], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000314.npy"}
{"epoch": 0.6575916230366492, "step": 315, "batch_size": 128, "mean": 72.77225494384766, "std": 102.24067687988281, "min": -162.53128051757812, "p10": -38.266117858886716, "median": 55.79637145996094, "p90": 205.8671401977539, "max": 305.5400085449219, "pos_frac": 0.7421875, "sample": [-32.480133056640625, -15.656845092773438, 10.644168853759766, -19.686996459960938, 80.57098388671875, 238.60107421875, 100.07957458496094, 209.93997192382812, 56.1959228515625, 197.76025390625, 81.18258666992188, -27.553733825683594, -41.9921875, 58.4483642578125, 99.53688049316406, -70.31504821777344, 193.39590454101562, 218.11114501953125, 51.53912353515625, 205.59324645996094, 142.4822998046875, 1.267465591430664, -115.91656494140625, 185.42059326171875, 258.34246826171875, 9.01715087890625, 50.45697021484375, 206.5062255859375, 87.92762756347656, 246.67959594726562, 34.78302001953125, 95.36874389648438, -150.99078369140625, -29.0595703125, -57.1796875, 32.07466125488281, 160.47821044921875, 163.4307861328125, 34.26295471191406, -16.89044189453125, -31.864370346069336, -10.615570068359375, 7.70947265625, -37.83219909667969, 283.36749267578125, 190.36123657226562, 168.97610473632812, 242.38095092773438, -12.890533447265625, 184.500244140625, -162.01614379882812, 145.0419464111328, 26.142913818359375, 56.296356201171875, 125.31741333007812, 45.81627655029297, 139.2978515625, 29.458572387695312, 40.86311340332031, 123.24932861328125, 204.22900390625, -9.71099853515625, 52.40008544921875, 116.31126403808594, 185.00555419921875, -44.211029052734375, 55.396820068359375, -20.188232421875, -7.820953369140625, -39.278594970703125, 33.86859130859375, 286.4045715332031, 196.0968017578125, 40.60528564453125, 111.47518920898438, 182.95095825195312, 33.303497314453125, -4.3585357666015625, 165.06210327148438, 106.1082534790039, 305.5400085449219, 157.15997314453125, 136.0538330078125, 125.3858642578125, -10.382736206054688, -88.80628967285156, -31.386627197265625, 233.21044921875, 41.59405517578125, 186.8287353515625, 23.188003540039062, -16.15679931640625, 84.71585083007812, -17.366012573242188, 172.5794677734375, 119.241943359375, 181.69607543945312, 44.47705078125, 34.970611572265625, -27.17120361328125, 11.431472778320312, 4.6372222900390625, 37.719970703125, -130.44540405273438, 181.36932373046875, 98.57487487792969, 67.25382995605469, 76.149169921875, 75.75689697265625, 41.29949951171875, -43.53725814819336, 12.161697387695312, -147.41586303710938, 136.577880859375, 8.437013626098633, 224.37030029296875, 20.749807357788086, 116.8468017578125, 99.11367797851562, 13.45925521850586, -162.53128051757812, -30.984954833984375, 118.0999755859375, 216.1140594482422, 161.010009765625, 197.27706909179688, 101.66238403320312, 24.733097076416016], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000315.npy"}
{"epoch": 0.6596858638743456, "step": 316, "batch_size": 128, "mean": 69.67374420166016, "std": 103.41529846191406, "min": -138.4945526123047, "p10": -52.178410339355466, "median": 57.1667366027832, "p90": 217.93629150390626, "max": 347.76141357421875, "pos_frac": 0.7421875, "sample": [217.90618896484375, 6.816249847412109, 38.83624267578125, 83.805419921875, -55.16497802734375, 116.27812957763672, 127.31317138671875, 14.0216064453125, 40.851654052734375, -138.4945526123047, 79.55120849609375, 185.20803833007812, 0.81866455078125, 23.218338012695312, -53.5078125, 1.886749267578125, 136.9423828125, 14.54119873046875, -102.21807861328125, 347.76141357421875, 159.20556640625, 44.772186279296875, -16.749099731445312, 220.41595458984375, 232.07839965820312, 146.91720581054688, 187.404296875, -51.71144104003906, 235.90199279785156, -53.26800537109375, 127.25840759277344, 78.64422607421875, 59.63214111328125, -18.372833251953125, -15.621673583984375, 292.5024719238281, 58.470008850097656, 185.82275390625, 129.33132934570312, 60.40785217285156, 270.9608459472656, 209.22012329101562, -45.335296630859375, -33.753990173339844, -124.31756591796875, -95.16357421875, -33.566680908203125, 117.49591064453125, 55.86346435546875, 155.709716796875, 68.75567626953125, -38.59538269042969, 69.60626220703125, 11.311370849609375, -33.67623519897461, 22.578109741210938, 6.165498733520508, 75.3314208984375, 106.916259765625, 240.8621826171875, -39.96615982055664, -92.42876434326172, 110.811767578125, 30.385528564453125, -0.1988525390625, 211.66091918945312, -46.898406982421875, -82.22686767578125, 1.775360107421875, 107.32347106933594, 47.5633544921875, 119.78280639648438, 125.72911071777344, 218.00653076171875, 66.1614990234375, 183.7843017578125, 53.12974548339844, 41.51359558105469, 46.35382080078125, -8.540817260742188, -17.32562255859375, 41.109375, 44.393890380859375, 89.06585693359375, -95.36593627929688, 109.15377807617188, 240.53555297851562, 12.201095581054688, -95.68963623046875, 164.20849609375, -126.70242309570312, 40.1470947265625, 98.53005981445312, 24.103370666503906, 38.17475128173828, 71.22103881835938, 64.32086181640625, -26.382423400878906, -48.452880859375, 137.62454223632812, 10.882972717285156, 238.3983154296875, -8.670677185058594, 128.6112060546875, 131.90103149414062, 315.1776123046875, -37.93913269042969, 165.6588134765625, 3.8712081909179688, 135.72698974609375, 195.4249267578125, 255.54290771484375, 52.646728515625, 191.87930297851562, 36.39091491699219, 99.41059875488281, 97.83074188232422, 66.32667541503906, 277.17913818359375, 31.103271484375, 0.0, -88.70980834960938, 61.7452392578125, 183.6060791015625, 161.62313842773438, 29.715850830078125, 186.9544677734375, -18.423233032226562], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000316.npy"}
{"epoch": 0.6617801047120419, "step": 317, "batch_size": 128, "mean": 65.56678009033203, "std": 84.48687744140625, "min": -197.56997680664062, "p10": -26.697511672973615, "median": 58.16657257080078, "p90": 178.92794189453124, "max": 273.44189453125, "pos_frac": 0.7734375, "sample": [77.62509155273438, -95.64118957519531, 0.0, 21.51028060913086, 224.33306884765625, -21.23895263671875, 8.9949951171875, 69.08795166015625, 23.632400512695312, 89.73745727539062, 117.02838134765625, 41.850257873535156, 46.46929168701172, 0.0, 16.011985778808594, 111.90826416015625, 0.0, 32.467803955078125, 59.361114501953125, 56.97203063964844, 23.5521240234375, 77.92202758789062, 76.73178100585938, 200.24993896484375, 22.553070068359375, 158.76177978515625, 150.22377014160156, 78.59656524658203, 166.41070556640625, 13.010238647460938, 89.857421875, 117.70489501953125, 192.12847900390625, -39.65758514404297, 213.99623107910156, 143.08526611328125, 119.8081283569336, 214.4955291748047, 66.28651428222656, -95.2122802734375, 137.72479248046875, 30.25018310546875, 170.2005157470703, 22.556514739990234, 96.61358642578125, 234.05963134765625, -66.88456726074219, -21.910659790039062, 97.35459899902344, 179.10174560546875, 88.97181701660156, -11.302536010742188, -47.47149658203125, 126.60264587402344, 148.49655151367188, 125.9918212890625, 38.2469482421875, 26.24615478515625, 59.787925720214844, 64.16656494140625, 60.885955810546875, -2.3837451934814453, 183.7757568359375, 51.33233642578125, 97.57087707519531, 178.85345458984375, 24.51092529296875, 72.44514465332031, 74.11766052246094, -21.110275268554688, 236.06405639648438, -52.765625, 6.783607482910156, 93.66865539550781, 20.49156951904297, -0.14121437072753906, 40.66255187988281, 69.28347778320312, 22.384185791015625, 35.82208251953125, -15.179428100585938, -197.56997680664062, 237.30426025390625, 273.44189453125, -110.13717651367188, 34.67572021484375, 51.48004150390625, 82.869384765625, 135.69729614257812, 119.26023864746094, -9.692779541015625, 121.55207824707031, 10.044633865356445, 68.31367492675781, -68.23452758789062, 142.01881408691406, 144.617919921875, 15.803939819335938, 118.12777709960938, -89.02264404296875, -44.979400634765625, 229.4869384765625, 176.2503662109375, 169.29248046875, 0.2344970703125, 30.5318603515625, -11.122735977172852, 30.24371337890625, 82.86630249023438, 132.82998657226562, 156.76605224609375, 7.382411956787109, 185.80810546875, -64.06414794921875, 41.82487487792969, -37.8668327331543, -1.0944194793701172, -6.501556396484375, 89.53956604003906, 35.851348876953125, 133.6441650390625, 42.753997802734375, 53.05735778808594, -0.8393077850341797, 21.47356414794922, 166.83670043945312, -15.521072387695312, 160.8232879638672], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000317.npy"}
{"epoch": 0.6638743455497382, "step": 318, "batch_size": 128, "mean": 72.29960632324219, "std": 93.54304504394531, "min": -155.30294799804688, "p10": -50.788081359863284, "median": 74.47203063964844, "p90": 208.89883422851562, "max": 296.99560546875, "pos_frac": 0.7578125, "sample": [111.91697692871094, 84.3798828125, 42.64423370361328, 73.663818359375, -2.8583984375, 30.919509887695312, -32.17347717285156, -26.59234619140625, 137.35604858398438, 16.10675048828125, 51.86131286621094, -26.641326904296875, 6.3153076171875, 99.88128662109375, -80.09771728515625, 117.68142700195312, -65.05448913574219, 52.57275390625, 80.74102783203125, 72.48941040039062, 177.622314453125, 24.718460083007812, 230.35775756835938, -67.187744140625, 95.89366149902344, 36.29022216796875, 223.02752685546875, 140.5306854248047, 84.23248291015625, 117.10931396484375, 7.677986145019531, 250.3812713623047, 12.150299072265625, 161.41732788085938, 30.68487548828125, 84.62472534179688, 3.242168426513672, 66.76974487304688, 208.43621826171875, 173.87490844726562, -29.12933349609375, -57.38037109375, 103.12994384765625, 145.16830444335938, 82.0323486328125, 143.12435913085938, 136.78912353515625, 174.130859375, 256.82275390625, -64.06761169433594, 116.03860473632812, 99.56256103515625, 116.33736419677734, 228.40750122070312, 242.11679077148438, 93.0003662109375, 80.85305786132812, -24.8602294921875, 75.78457641601562, 165.88543701171875, 116.1185302734375, 209.978271484375, -43.697479248046875, 125.42166137695312, 296.99560546875, 238.14984130859375, 17.377593994140625, -41.60593032836914, 33.75860595703125, 138.15240478515625, 256.417724609375, 140.3936767578125, -30.32373046875, 55.312347412109375, 149.0430908203125, 111.22561645507812, -2.7205657958984375, 142.25575256347656, 189.8173370361328, -8.07708740234375, -1.377105712890625, 105.53048706054688, 161.99188232421875, 118.57421875, -50.90203857421875, 176.943359375, 94.63482666015625, 39.14630126953125, -71.11849212646484, -20.023712158203125, 6.663116455078125, 170.17788696289062, -108.488037109375, -16.111968994140625, -32.82814025878906, 94.98739624023438, 166.11361694335938, 18.623775482177734, 229.42205810546875, -71.30804443359375, 25.169692993164062, -2.03271484375, -98.88673400878906, 15.103729248046875, -56.11714172363281, 149.14520263671875, 73.47604370117188, -78.9359130859375, 57.738162994384766, 40.58587646484375, 263.06182861328125, 67.75367736816406, 2.8861236572265625, 131.5443572998047, 78.77178955078125, 8.084320068359375, 10.608062744140625, -50.73924255371094, 220.33963012695312, 75.28024291992188, 68.50726318359375, 102.33966827392578, 79.8121337890625, 72.8436279296875, 156.6476593017578, 1.308746337890625, -155.30294799804688, 0.0], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000318.npy"}
{"epoch": 0.6659685863874345, "step": 319, "batch_size": 128, "mean": 69.97792053222656, "std": 89.26567840576172, "min": -163.11056518554688, "p10": -32.297448730468744, "median": 63.48811721801758, "p90": 177.62979125976562, "max": 309.0163269042969, "pos_frac": 0.7578125, "sample": [149.07102966308594, -21.597856521606445, 165.7349853515625, 7.208984375, 105.73147583007812, 41.79901885986328, 133.09906005859375, 126.67253112792969, -11.44268798828125, 0.0, 3.40985107421875, 89.2003173828125, -27.8634033203125, 170.5029296875, 95.91207885742188, 201.84835815429688, 84.43439483642578, -40.819358825683594, 179.56756591796875, 1.0032806396484375, 63.219970703125, 166.64361572265625, -30.537933349609375, 174.714599609375, 62.59367370605469, 137.88858032226562, 190.1304931640625, 265.8311462402344, -79.71537017822266, 198.08538818359375, -134.25738525390625, 158.32965087890625, 63.756263732910156, 237.96914672851562, 176.79931640625, -48.902099609375, 3.8163299560546875, 309.0163269042969, 130.90567016601562, -25.84967803955078, 18.640491485595703, 60.25190734863281, 81.45487976074219, -0.9884109497070312, 109.77723693847656, -36.402984619140625, 81.72991943359375, 57.473968505859375, 81.52078247070312, 107.11337280273438, -42.15648651123047, 106.43710327148438, 52.45135498046875, 9.88775634765625, 141.9832305908203, 114.51080322265625, 53.23846435546875, -6.7389984130859375, 204.103515625, 266.739501953125, 85.01229858398438, 159.11260986328125, 11.710548400878906, 34.80857849121094, 289.2920227050781, 175.7869873046875, -9.38824462890625, -6.7515411376953125, 41.89427185058594, 109.82234191894531, 152.9752197265625, -119.5728759765625, 111.65719604492188, 150.19338989257812, 7.73126220703125, 8.047073364257812, 29.406036376953125, -3.9443740844726562, 46.693023681640625, 131.8925323486328, 191.08590698242188, 85.99688720703125, -28.801422119140625, -41.409942626953125, 30.508956909179688, 67.53681945800781, 141.7893524169922, 2.16265869140625, -22.316131591796875, -54.89508056640625, 2.4886474609375, 124.95819091796875, 190.3033447265625, 35.434722900390625, 111.24927520751953, -2.3356781005859375, 28.802337646484375, 119.140869140625, 161.5089111328125, 124.35415649414062, 16.521286010742188, 45.0606689453125, 101.02398681640625, 66.75375366210938, -1.9205322265625, 83.73480987548828, 22.54534912109375, -50.858375549316406, -163.11056518554688, 80.15316772460938, 105.84640502929688, -44.764556884765625, 51.91424560546875, 173.87591552734375, 248.0806884765625, -92.34848022460938, 159.25225830078125, -21.411949157714844, 112.93634033203125, -8.012712478637695, -1.221282958984375, 31.73602294921875, 165.8024139404297, 12.881118774414062, 38.721221923828125, 75.54095458984375, 51.09056091308594, 88.4728012084961], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000319.npy"}
{"epoch": 0.6680628272251309, "step": 320, "batch_size": 128, "mean": 69.360595703125, "std": 106.89678955078125, "min": -220.48593139648438, "p10": -49.0769287109375, "median": 52.58929443359375, "p90": 211.44215087890623, "max": 368.04986572265625, "pos_frac": 0.7421875, "sample": [138.74197387695312, 6.272638320922852, -41.549163818359375, -220.48593139648438, 218.2120361328125, 137.22288513183594, 24.433837890625, 193.7705078125, -41.5333251953125, 9.213817596435547, 13.17864990234375, 26.971412658691406, 109.4632568359375, 133.74319458007812, -51.982818603515625, -3.23089599609375, 157.27374267578125, -26.994430541992188, 223.91299438476562, -27.8621826171875, 46.35406494140625, 48.96381378173828, -58.25238037109375, 126.58139038085938, 166.88558959960938, -0.2371978759765625, -3.7821826934814453, 31.50555419921875, -145.8943328857422, -1.9290542602539062, 336.8874206542969, 79.6236572265625, 170.74713134765625, 194.16476440429688, 0.17144775390625, 21.728713989257812, -0.034912109375, 68.07159423828125, 74.88525390625, 3.7032546997070312, 42.523399353027344, 182.77154541015625, 191.77880859375, -2.23126220703125, -83.91973876953125, 5.267791748046875, 163.90701293945312, 30.56182861328125, 229.9967041015625, 230.20274353027344, 46.56959533691406, -13.5235595703125, 303.12066650390625, -47.042083740234375, 172.5399169921875, 142.4151153564453, -49.82142639160156, 91.65478515625, 14.861129760742188, 79.33160400390625, 204.14712524414062, -9.097503662109375, 107.58174133300781, 67.07609558105469, 251.18197631835938, 180.94598388671875, 67.147705078125, 105.07373046875, -0.36112022399902344, 75.28353881835938, 148.00759887695312, -90.3702163696289, 29.449195861816406, -105.53439331054688, 168.10693359375, 126.20536804199219, 54.39080810546875, 44.4488525390625, 67.82206726074219, 184.5875244140625, 132.60479736328125, 1.693603515625, -158.24749755859375, 73.96542358398438, 49.39892578125, 214.3411865234375, 161.72225952148438, -102.7267837524414, 129.63601684570312, -4.736381530761719, 71.2489013671875, 8.36138916015625, -15.895526885986328, 9.13934326171875, 46.15348815917969, -120.97718811035156, -16.1756591796875, 31.986297607421875, 210.27194213867188, 317.81500244140625, 214.17263793945312, 269.70989990234375, 50.78778076171875, 83.63247680664062, 11.404800415039062, -3.931222915649414, 118.07962036132812, 124.96295166015625, 103.22183227539062, 119.48809814453125, 46.6651611328125, 219.072509765625, 44.068084716796875, 164.378173828125, 65.1474609375, 116.09416198730469, 76.84767150878906, -136.83261108398438, 0.8516693115234375, 167.4847412109375, 135.07952880859375, -22.11846923828125, 33.47694396972656, -48.75785827636719, 368.04986572265625, 4.900764465332031, -139.575439453125, 106.2430419921875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000320.npy"}
{"epoch": 0.6701570680628273, "step": 321, "batch_size": 128, "mean": 47.380645751953125, "std": 97.62854766845703, "min": -145.1781768798828, "p10": -60.40941314697265, "median": 30.380592346191406, "p90": 192.78553924560546, "max": 392.9804992675781, "pos_frac": 0.6875, "sample": [147.27261352539062, -129.39456176757812, 71.61256408691406, -133.6690673828125, -8.919563293457031, -58.651458740234375, 133.95367431640625, -140.6160888671875, -2.0860824584960938, 197.0155792236328, 213.47964477539062, 21.137496948242188, 175.344970703125, 22.444015502929688, 73.79542541503906, 59.64044189453125, 132.6902313232422, 205.93409729003906, 12.459136962890625, 29.580810546875, 2.09100341796875, 84.2608642578125, 158.17083740234375, 86.2080078125, -16.9951171875, -91.4920654296875, 55.52777099609375, 152.25119018554688, 20.84047508239746, -26.310256958007812, -36.711021423339844, 214.464111328125, 19.8480224609375, 204.52273559570312, -35.56494140625, -127.76910400390625, -36.92816162109375, 31.60511016845703, 22.282928466796875, 74.92779541015625, 41.290504455566406, -100.8065185546875, -50.58863067626953, -41.81072998046875, -19.540565490722656, 120.67315673828125, 0.377288818359375, -11.82391357421875, 7.5053863525390625, -81.56798553466797, 7.8282318115234375, 92.43048095703125, 12.404754638671875, -11.40472412109375, -17.854766845703125, 31.180374145507812, 4.777130126953125, 128.17721557617188, 20.807693481445312, 230.688232421875, -145.1781768798828, 160.14144897460938, 60.982177734375, 58.92699432373047, -58.367767333984375, 184.91177368164062, 152.41897583007812, 55.79109191894531, 39.41563415527344, 23.64935302734375, -27.46490478515625, 31.63397216796875, 222.34405517578125, 63.043548583984375, 13.957138061523438, 112.99508666992188, 195.36087036132812, -14.76235580444336, 100.71611022949219, -28.477081298828125, -29.450515747070312, 166.7762451171875, -53.85890197753906, 52.50224304199219, 15.58026123046875, 68.56143188476562, -18.648517608642578, 20.3087215423584, -116.19806671142578, 142.81863403320312, -10.251705169677734, 10.686712265014648, 33.1473388671875, 138.51028442382812, 136.8115997314453, 8.814224243164062, -36.54011535644531, 34.684295654296875, 194.5980987548828, 54.57867431640625, -75.4378662109375, 218.57794189453125, 6.8428192138671875, 93.2419204711914, 54.662109375, -107.6109619140625, 77.14774322509766, 153.33132934570312, 35.915321350097656, 11.107414245605469, 54.62837219238281, 166.99493408203125, 192.00872802734375, 392.9804992675781, 58.765045166015625, 83.94953918457031, 26.377952575683594, -50.250701904296875, -64.51130676269531, -38.7774658203125, -10.268775939941406, 83.96994018554688, -81.48445129394531, 290.5147705078125, 245.55426025390625, 85.37646484375, 0.0, 4.6512451171875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000321.npy"}
{"epoch": 0.6722513089005235, "step": 322, "batch_size": 128, "mean": 61.41636276245117, "std": 99.42850494384766, "min": -229.96527099609375, "p10": -44.0462890625, "median": 45.232749938964844, "p90": 197.77432861328123, "max": 366.350341796875, "pos_frac": 0.703125, "sample": [-229.96527099609375, 53.1807861328125, -18.82525634765625, 148.38067626953125, -0.0924072265625, -22.662506103515625, 41.840232849121094, 49.725250244140625, 77.70840454101562, 16.63330078125, -6.913276672363281, 171.41775512695312, 196.1483154296875, -16.36667823791504, 44.66859436035156, -32.845733642578125, 17.74359130859375, 80.71578979492188, 43.21527099609375, 133.486328125, 4.61749267578125, 41.196441650390625, -177.9276123046875, 161.781494140625, 256.3395080566406, 4.671092987060547, 44.18707275390625, 267.568115234375, 74.03547668457031, 75.4735107421875, -83.53668212890625, 9.13275146484375, 37.7969970703125, 17.03302001953125, 87.18626403808594, -114.40625, 115.21133422851562, 175.78485107421875, -32.73968505859375, 169.1285400390625, 112.63009643554688, -66.14236450195312, 173.371337890625, -25.937774658203125, 182.69955444335938, 299.8519592285156, -38.135772705078125, 127.30633544921875, 49.21879196166992, 22.001998901367188, 207.09539794921875, 251.133544921875, 21.29815673828125, -33.91459655761719, 52.87213134765625, 104.14591979980469, -16.273239135742188, 52.191436767578125, 133.6182861328125, -26.166259765625, -27.559783935546875, -0.584930419921875, 101.88623046875, 44.272483825683594, 140.0605010986328, 201.568359375, -43.773406982421875, 14.488433837890625, 137.80728149414062, 165.47451782226562, 22.1768798828125, -37.68743896484375, -48.46641540527344, 67.97138214111328, -22.304779052734375, 105.54463958740234, 52.23072052001953, -46.27313232421875, 70.11614990234375, 5.827812194824219, 59.8336181640625, 81.01220703125, 16.507904052734375, 0.0, 125.9351806640625, 26.799560546875, -13.682205200195312, 319.3490905761719, 41.435516357421875, 74.17903137207031, 212.657958984375, -68.64630126953125, 88.33514404296875, 260.9537658691406, 123.10041809082031, 34.173583984375, 186.69512939453125, 26.54913330078125, 95.41270446777344, 140.8908233642578, 90.86227416992188, -44.683013916015625, 63.88075256347656, -54.160064697265625, 71.59825134277344, 140.70809936523438, 7.8782501220703125, 58.24498748779297, 180.54290771484375, -34.61859130859375, 205.21646118164062, 222.66729736328125, -86.4293212890625, 73.59027099609375, 45.796905517578125, -64.29597473144531, 205.56951904296875, 10.503570556640625, -25.39544677734375, -2.8943328857421875, -1.3996124267578125, -68.821044921875, -9.88372802734375, -19.654296875, 35.2003173828125, 366.350341796875, 146.47024536132812, 151.5897979736328], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000322.npy"}
{"epoch": 0.6743455497382199, "step": 323, "batch_size": 128, "mean": 46.31390380859375, "std": 101.29764556884766, "min": -164.83946228027344, "p10": -72.83125991821288, "median": 33.903968811035156, "p90": 178.86290435791014, "max": 434.22467041015625, "pos_frac": 0.65625, "sample": [26.003585815429688, 37.2816162109375, -143.1173095703125, -55.273284912109375, -23.132347106933594, 101.15506744384766, 2.2905502319335938, 62.63720703125, 179.95399475097656, -62.5244140625, -80.73257446289062, -57.43304443359375, 85.59234619140625, -6.663177490234375, 11.422998428344727, 33.07170867919922, 178.39529418945312, 24.422805786132812, 105.00936889648438, -49.209110260009766, 25.754165649414062, -4.3336029052734375, 120.07498168945312, -45.6431884765625, 190.98385620117188, -79.501953125, 10.850971221923828, -60.705474853515625, 145.5636444091797, 291.9651794433594, -70.27278900146484, -107.09906005859375, 144.32815551757812, 52.46452331542969, 44.836273193359375, -15.705894470214844, 57.26008605957031, -8.443470001220703, 107.96237182617188, 125.86888122558594, 158.06939697265625, 149.30715942382812, -69.30197143554688, 120.83697509765625, -93.69430541992188, 14.361953735351562, 58.877227783203125, 52.20489501953125, -134.90765380859375, 137.46282958984375, 34.736228942871094, 17.934587478637695, -5.378265380859375, 3.89483642578125, -46.077392578125, 68.14932250976562, 46.40997314453125, 250.0797119140625, -156.2679443359375, 149.23834228515625, 29.577438354492188, 16.3466796875, 58.55011749267578, -11.088798522949219, 86.69049072265625, 227.97586059570312, -61.77947998046875, 163.9336700439453, 64.36077880859375, 80.3441162109375, -9.21319580078125, 256.3090515136719, 114.09425354003906, 198.07763671875, 61.0771484375, -7.272747039794922, 86.40435791015625, 94.04757690429688, 167.79745483398438, -10.058258056640625, 108.93812561035156, -3.67529296875, 180.44151306152344, 66.35781860351562, -33.59869384765625, -12.340682983398438, 31.2342529296875, -18.450836181640625, -118.5413818359375, 24.065549850463867, 155.67481994628906, 104.69976806640625, 13.253244400024414, 11.900421142578125, 7.556793212890625, 209.84857177734375, 80.00885009765625, -34.59075927734375, -67.18820190429688, 39.46831512451172, 182.86822509765625, 10.398773193359375, -25.510879516601562, -88.64926147460938, -164.83946228027344, 121.03369140625, -34.18011474609375, -118.150390625, 193.092529296875, 81.99615478515625, 160.24786376953125, -44.40655517578125, 164.35366821289062, -7.7149810791015625, 101.86775207519531, 3.379852294921875, 100.66693115234375, 434.22467041015625, 66.26727294921875, -11.284088134765625, 100.63723754882812, 60.729827880859375, 48.354766845703125, -141.8632354736328, 148.16354370117188, -78.801025390625, 22.686134338378906, 240.07928466796875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000323.npy"}
{"epoch": 0.6764397905759162, "step": 324, "batch_size": 128, "mean": 75.23384857177734, "std": 90.66046905517578, "min": -173.30538940429688, "p10": -19.62910919189453, "median": 55.173988342285156, "p90": 197.85646514892576, "max": 273.01593017578125, "pos_frac": 0.8046875, "sample": [273.01593017578125, 258.6240234375, 20.80291748046875, 106.24507904052734, 226.9305877685547, 241.50250244140625, 47.018394470214844, 180.1593475341797, 176.53143310546875, 29.50802230834961, -173.30538940429688, 0.0, 108.7125244140625, -66.621337890625, -22.290115356445312, 91.45108795166016, 7.438423156738281, -17.027013778686523, 176.7750244140625, 80.0123291015625, 147.62835693359375, 135.3023681640625, 85.00300598144531, 19.31011962890625, 266.7518310546875, -6.715457916259766, 86.11734008789062, 51.58221435546875, 13.088821411132812, 5.23284912109375, -1.73431396484375, 209.09043884277344, 33.35687255859375, 116.147705078125, 19.11737060546875, 101.37708282470703, 7.9696502685546875, 81.62396240234375, 103.9373779296875, 0.0, 40.28546142578125, -5.2467041015625, 147.32247924804688, 7.959148406982422, 34.545654296875, 3.22186279296875, 93.08193969726562, 59.48065185546875, 41.453094482421875, -22.556602478027344, 185.77716064453125, 45.76628112792969, -59.581817626953125, -144.5362091064453, 44.21733093261719, 11.468856811523438, 182.076171875, 216.02536010742188, 119.47830200195312, 160.4554443359375, 175.63677978515625, 177.6568603515625, -0.48439979553222656, 141.21359252929688, 195.1757049560547, 79.87060546875, -87.20834350585938, 3.6777725219726562, 33.641822814941406, 126.9075927734375, 259.0966491699219, 141.37680053710938, 23.032184600830078, 96.88980102539062, 56.9688720703125, -72.35762023925781, 66.2236328125, 154.57601928710938, 155.94473266601562, 111.05926513671875, -42.84526062011719, 146.41363525390625, 7.8227386474609375, 56.17791748046875, -36.65533447265625, 167.23150634765625, 222.65574645996094, 175.36871337890625, 8.95904541015625, -16.98260498046875, 185.830322265625, 97.76052856445312, 54.17005920410156, 2.7112045288085938, 28.96728515625, 167.87591552734375, -18.488677978515625, 204.111572265625, 139.18161010742188, 135.47415161132812, 53.781829833984375, 99.68571472167969, 0.0, -25.110641479492188, 80.613037109375, 1.6874847412109375, -0.17371368408203125, 84.47062683105469, -31.71566390991211, 241.84918212890625, 16.37078857421875, 194.5878448486328, 46.34830093383789, 34.198760986328125, 13.776039123535156, 250.1759033203125, 7.68658447265625, 2.68621826171875, 253.84759521484375, 48.50666046142578, -65.92060089111328, 140.7265625, -11.757408142089844, 103.26693725585938, 48.123565673828125, 38.476898193359375, 62.4483642578125, 36.29168701171875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000324.npy"}
{"epoch": 0.6785340314136126, "step": 325, "batch_size": 128, "mean": 64.68997192382812, "std": 108.16238403320312, "min": -285.3504638671875, "p10": -50.970169067382805, "median": 41.00568771362305, "p90": 202.06458587646483, "max": 381.8089599609375, "pos_frac": 0.7109375, "sample": [90.9876708984375, 116.94308471679688, -41.816925048828125, -42.66119384765625, 16.069507598876953, 56.23832702636719, 115.23309326171875, 105.72914123535156, 183.01393127441406, 144.91046142578125, 115.80075073242188, 359.1861572265625, 181.76412963867188, -7.760162353515625, 8.117141723632812, -7.27410888671875, 113.77278137207031, 62.30778503417969, -19.163116455078125, -70.21371459960938, -34.45287322998047, 154.67532348632812, 43.992095947265625, -14.846445083618164, -62.80009460449219, -71.8941650390625, -2.421051025390625, 325.77392578125, 104.40010833740234, 44.281982421875, 0.8830223083496094, 179.38990783691406, 70.05169677734375, 45.239013671875, 61.430999755859375, 190.9124755859375, 145.57106018066406, 242.71307373046875, 22.323776245117188, -35.930633544921875, 148.17636108398438, 266.511474609375, 48.60401916503906, -5.522050857543945, 166.95245361328125, 28.209320068359375, 17.883041381835938, 22.568649291992188, 217.42529296875, -61.2598876953125, -3.6064453125, 40.37293243408203, 176.448486328125, 43.923919677734375, 109.68338012695312, 188.56317138671875, -62.055145263671875, 104.056396484375, 200.89642333984375, 225.24066162109375, 166.45199584960938, 266.27972412109375, -108.29165649414062, 38.23180389404297, 111.14324951171875, 11.263641357421875, 27.77789306640625, 7.232879638671875, 27.407760620117188, 9.16241455078125, 204.79029846191406, 15.816398620605469, 131.27651977539062, 90.19171142578125, 15.884963989257812, 85.43417358398438, -28.491851806640625, -64.17012023925781, 38.60284423828125, -43.8231201171875, 381.8089599609375, 36.84382629394531, -48.0467529296875, 15.328216552734375, 148.51327514648438, -17.792648315429688, 41.63844299316406, 124.69425964355469, 120.38296508789062, -57.791473388671875, -13.71942138671875, 77.055419921875, 11.118438720703125, -33.135955810546875, 223.46096801757812, -26.747161865234375, 193.418212890625, 145.84219360351562, 86.42831420898438, 21.629451751708984, -285.3504638671875, -4.812164306640625, 253.9617919921875, -179.49485778808594, 247.66778564453125, 151.31674194335938, 95.98184967041016, 1.0454254150390625, 14.158538818359375, 190.63546752929688, -0.0930633544921875, -29.315399169921875, -1.2912445068359375, -2.2538070678710938, 19.7965087890625, 74.39590454101562, 11.278045654296875, -15.89056396484375, -66.67041015625, 200.5797119140625, 215.02175903320312, 15.461563110351562, 2.608884811401367, -149.19381713867188, -103.53900146484375, 199.57583618164062, 99.621826171875, 128.46290588378906], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000325.npy"}
{"epoch": 0.680628272251309, "step": 326, "batch_size": 128, "mean": 62.152557373046875, "std": 98.97278594970703, "min": -179.15948486328125, "p10": -71.87187728881835, "median": 57.56327819824219, "p90": 176.6871841430664, "max": 311.42828369140625, "pos_frac": 0.734375, "sample": [91.14340209960938, 174.1100616455078, 24.684066772460938, 311.42828369140625, 60.184967041015625, 169.19345092773438, -67.8382568359375, -62.780548095703125, 30.039722442626953, 254.565185546875, 292.6766052246094, 49.81483459472656, -43.1409912109375, 254.2432861328125, 67.2939453125, 94.2806396484375, 0.0, -96.41705322265625, 19.60558319091797, 4.92486572265625, 138.04359436035156, 63.3944091796875, 52.51251220703125, 104.45846557617188, 8.917388916015625, 239.00669860839844, -44.174591064453125, 55.13531494140625, -77.913330078125, -70.68036651611328, 125.87734985351562, -121.3006591796875, 38.273406982421875, 197.76174926757812, -74.65206909179688, -5.922149658203125, 37.583065032958984, 37.10786437988281, -0.832489013671875, 114.18930053710938, 97.17532348632812, 214.37081909179688, 41.09953308105469, -76.28146362304688, 240.14285278320312, 70.71033477783203, 97.392333984375, -116.8300552368164, 18.89519500732422, 23.91998291015625, 32.76091003417969, 157.85455322265625, 106.10392761230469, 0.0, 135.51437377929688, 101.57742309570312, 51.376487731933594, 124.44345092773438, -66.87930297851562, -85.76925659179688, 182.70046997070312, -179.15948486328125, 172.51986694335938, 100.76305389404297, 148.59423828125, -129.03668212890625, 112.8394775390625, 28.259857177734375, 57.39251708984375, -28.980667114257812, -8.830169677734375, 151.85244750976562, 153.58029174804688, 15.276321411132812, 196.15394592285156, 134.24212646484375, -54.08038330078125, 9.040130615234375, 152.67190551757812, -14.123306274414062, 150.02703857421875, 160.87307739257812, 90.94656372070312, -51.192413330078125, 105.69705200195312, 109.61854553222656, -33.07763671875, 153.2814483642578, 167.64114379882812, 2.0301151275634766, 86.89130401611328, 60.3463134765625, 112.65912628173828, -9.967864990234375, -23.6435546875, -92.40031433105469, 50.59672546386719, 155.43429565429688, 37.378929138183594, -58.148521423339844, 29.307647705078125, 98.56201934814453, -113.47337341308594, 134.144287109375, 39.52268981933594, 57.734039306640625, -143.9091033935547, 27.591888427734375, 184.76708984375, 96.14263916015625, 45.283935546875, 112.403076171875, 40.651771545410156, -18.756317138671875, 106.04987335205078, 138.15911865234375, 128.60943603515625, 0.0, 125.66586303710938, -139.67755126953125, 138.04052734375, 11.37890625, 82.40996551513672, 26.33758544921875, 207.9352569580078, 249.1396484375, 136.52935791015625, 165.9346923828125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000326.npy"}
{"epoch": 0.6827225130890052, "step": 327, "batch_size": 128, "mean": 73.71954345703125, "std": 99.63726043701172, "min": -168.8538818359375, "p10": -37.60794486999511, "median": 57.136810302734375, "p90": 204.76443634033203, "max": 428.816162109375, "pos_frac": 0.796875, "sample": [26.760398864746094, 152.04205322265625, 55.696781158447266, 10.369636535644531, -7.663324356079102, 75.62744140625, 69.16984558105469, 183.09811401367188, -54.1243896484375, 11.545623779296875, 215.92401123046875, 240.42221069335938, 185.87612915039062, 23.947357177734375, 149.98068237304688, -16.651336669921875, 224.1109619140625, 195.36248779296875, 244.60678100585938, 41.814666748046875, 146.77035522460938, 91.90963745117188, 244.19505310058594, 52.27024841308594, -34.24432373046875, 42.2919921875, 45.0819091796875, 119.74249267578125, 194.14434814453125, 15.419036865234375, -100.51821899414062, 166.3175048828125, 216.96063232421875, -98.41921997070312, 134.40789794921875, 39.30517578125, 139.22161865234375, 32.50828552246094, 321.0033264160156, 33.707733154296875, 95.9367904663086, 109.27827453613281, 208.3382110595703, 65.25910949707031, -14.412460327148438, -136.0003662109375, 56.79193878173828, 3.336273193359375, 69.87445068359375, -52.50689697265625, 65.10708618164062, -9.326629638671875, 98.51116943359375, 36.337066650390625, -127.62416076660156, -53.160621643066406, 58.93695068359375, 188.0672607421875, 6.968414306640625, 0.0, -79.12312316894531, 203.23281860351562, 94.24604797363281, -17.412506103515625, 44.8212890625, 147.5085906982422, 57.48168182373047, 191.341064453125, -10.0255126953125, -45.45639419555664, 37.38592529296875, 181.6345977783203, 44.80096435546875, -54.61222839355469, 48.05659484863281, 173.829345703125, 219.55618286132812, 106.67378234863281, 158.658203125, 127.51300048828125, -5.530181884765625, -21.815902709960938, 13.858184814453125, 19.026824951171875, 28.82122802734375, 117.88067626953125, 70.24998474121094, 3.8310928344726562, 109.9232177734375, 28.528656005859375, 24.902496337890625, 428.816162109375, 300.7024841308594, 85.69686889648438, 42.30482482910156, -28.9910888671875, 3.5028076171875, -160.39190673828125, 50.01142120361328, 76.50177001953125, 58.17041778564453, 15.889984130859375, 136.83154296875, 56.27183532714844, 38.69525146484375, 66.20581817626953, 68.56771850585938, 162.21060180664062, -9.475181579589844, 158.381103515625, 140.36044311523438, 128.09173583984375, 91.44123840332031, 121.09405517578125, 245.2344970703125, 47.255096435546875, 69.78033447265625, 112.645751953125, -168.8538818359375, 103.7974853515625, 253.763671875, 46.994171142578125, -3.0222320556640625, 154.2846221923828, -100.4100341796875, 39.603363037109375, 39.513336181640625, 45.135589599609375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000327.npy"}
{"epoch": 0.6848167539267016, "step": 328, "batch_size": 128, "mean": 66.97782897949219, "std": 89.61927032470703, "min": -162.210205078125, "p10": -34.040850830078114, "median": 59.54368591308594, "p90": 184.58373107910157, "max": 333.5939025878906, "pos_frac": 0.7734375, "sample": [10.654083251953125, 53.69164276123047, 139.98419189453125, 122.1190185546875, 29.367645263671875, 155.49453735351562, 280.36663818359375, 110.34488677978516, 136.1356201171875, 8.61151123046875, 55.53704833984375, 120.163818359375, -31.211639404296875, 116.48916625976562, 17.6959228515625, 73.84918212890625, 140.11672973632812, 255.36767578125, 196.66949462890625, -162.210205078125, 30.82000732421875, 164.24371337890625, -109.2468490600586, 78.17324829101562, -55.10260009765625, 2.2391204833984375, 122.16390991210938, 54.330291748046875, 140.1223907470703, 15.14483642578125, 73.29510498046875, -57.98686218261719, 183.8802490234375, 51.8179931640625, 5.45587158203125, 173.89073181152344, 59.900115966796875, 66.8829345703125, 333.5939025878906, 3.4786529541015625, 70.55451965332031, 38.35940933227539, -3.68896484375, -15.935516357421875, -39.5933837890625, 241.19696044921875, 25.96820068359375, 71.2374267578125, 87.17744445800781, 2.8321685791015625, -1.7655181884765625, 47.649169921875, 76.39677429199219, 77.45724487304688, 196.63194274902344, 2.684661865234375, -73.792724609375, 24.797393798828125, 97.44950103759766, 0.3421478271484375, 19.1959228515625, 144.23721313476562, -11.900897979736328, -60.034088134765625, 156.8231964111328, 48.76032638549805, 20.08148193359375, 3.85968017578125, -3.45831298828125, 154.2808074951172, 118.360595703125, -29.234527587890625, 151.37106323242188, 107.14312744140625, 3.4327239990234375, 84.88775634765625, 14.8912353515625, 21.595001220703125, 48.318260192871094, -45.5975341796875, 173.36093139648438, 151.26678466796875, 125.08761596679688, 232.89889526367188, -68.85436248779297, 294.63446044921875, 214.5428466796875, -62.0675048828125, 142.38023376464844, -98.15298461914062, -25.555938720703125, 151.07382202148438, -5.08038330078125, 79.6906967163086, 210.89605712890625, -7.488983154296875, 65.23757934570312, 97.7991943359375, 78.24859619140625, -9.317584991455078, 4.8578033447265625, -52.61077880859375, -25.70245361328125, 78.17752075195312, 116.897705078125, 124.65206146240234, 9.71771240234375, 186.22518920898438, -7.7718048095703125, 17.16936492919922, 124.04171752929688, 151.8446044921875, 228.04409790039062, -4.6485595703125, -31.66119384765625, 81.68603515625, 7.022552490234375, -26.72149658203125, 101.5648193359375, 59.187255859375, 38.9449348449707, 29.479324340820312, 78.74871826171875, -48.69708251953125, 68.176025390625, 207.4364013671875, 176.89157104492188, 100.30492401123047], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000328.npy"}
{"epoch": 0.6869109947643979, "step": 329, "batch_size": 128, "mean": 68.18951416015625, "std": 115.4984130859375, "min": -205.068359375, "p10": -57.095755004882804, "median": 52.486793518066406, "p90": 232.60065917968748, "max": 400.48785400390625, "pos_frac": 0.71875, "sample": [-81.4490966796875, 38.55857849121094, -89.45535278320312, 162.8582763671875, -45.1475830078125, 162.44375610351562, -92.14288330078125, 54.251251220703125, -1.89532470703125, -70.8214111328125, 132.55809020996094, 40.9935302734375, -28.452178955078125, -205.068359375, -38.204864501953125, 272.7828063964844, 17.79021453857422, -10.363018035888672, 19.947860717773438, 72.82958984375, 287.8536071777344, 239.96234130859375, -3.1752853393554688, 41.328643798828125, 162.72271728515625, 84.81451416015625, -169.52096557617188, 125.74553680419922, 144.83323669433594, -54.16827392578125, 238.40625, -72.15574645996094, 20.009048461914062, 60.96900939941406, 40.53578186035156, 77.4609375, 155.90301513671875, 51.056640625, 55.933441162109375, 106.45828247070312, -74.693359375, 306.1212463378906, 103.5469970703125, 61.716522216796875, -97.08074951171875, 43.439117431640625, 244.666259765625, 75.0479736328125, 203.15679931640625, 145.97938537597656, 25.54364013671875, -15.624542236328125, 14.940475463867188, 103.58201599121094, 54.55015563964844, 15.86785888671875, -2.94720458984375, 293.259765625, 194.80194091796875, 2.31365966796875, 65.87808227539062, -47.46759033203125, 18.734512329101562, -1.30517578125, -18.56525421142578, 123.79055786132812, 113.09375, 24.393531799316406, 209.060546875, 387.8157958984375, 101.428955078125, -46.5430908203125, 207.27627563476562, 143.00753784179688, -79.27658081054688, 69.6148681640625, 73.33290100097656, 66.20518493652344, 57.41301727294922, -63.926544189453125, 85.64306640625, 362.668212890625, 15.712432861328125, 91.09603881835938, -15.525466918945312, -51.822364807128906, 299.70855712890625, 111.78799438476562, 142.30435180664062, 84.31713104248047, 230.112548828125, -10.10150146484375, -18.493370056152344, 23.141281127929688, 1.9762077331542969, 44.37391662597656, -151.6436767578125, -28.98297119140625, 14.845855712890625, 186.475341796875, 98.34317016601562, 18.7349853515625, -21.094100952148438, 141.90829467773438, 249.69183349609375, 54.0570068359375, 53.91694641113281, 13.086944580078125, 48.93282699584961, 117.90280151367188, 46.04364013671875, -44.7001953125, 167.1917724609375, -41.05029296875, 3.938913345336914, 137.09719848632812, 177.5504913330078, 400.4808349609375, -6.0166168212890625, 92.3511962890625, 5.7686920166015625, 400.48785400390625, 156.4072265625, 2.701141357421875, 140.12286376953125, 43.8277587890625, -119.2745361328125, -36.88048553466797], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000329.npy"}
{"epoch": 0.6890052356020943, "step": 330, "batch_size": 128, "mean": 63.966224670410156, "std": 92.34307098388672, "min": -205.56491088867188, "p10": -36.84879074096679, "median": 58.50965881347656, "p90": 176.67508850097656, "max": 331.95709228515625, "pos_frac": 0.765625, "sample": [-12.3858642578125, 128.55166625976562, 30.253616333007812, -205.56491088867188, 163.0711669921875, 142.65487670898438, 37.52936553955078, 31.297882080078125, 326.51806640625, 89.86288452148438, 136.48602294921875, 37.922576904296875, 41.86676788330078, 163.2396240234375, 4.684638977050781, 88.35931396484375, -104.8177490234375, 39.9688720703125, 94.12046813964844, -10.72137451171875, 82.33377075195312, 24.031694412231445, 3.9712142944335938, 157.32049560546875, 59.34661865234375, 178.79147338867188, 44.849639892578125, 9.7808837890625, 92.11048126220703, -38.7305908203125, 26.15545654296875, 217.10272216796875, 121.16650390625, 103.95327758789062, -26.6334228515625, 316.8826599121094, -7.4560546875, -13.2657470703125, -15.676300048828125, 157.65316772460938, 139.53359985351562, -42.82405090332031, -60.14002990722656, -9.76177978515625, 137.61642456054688, -134.35263061523438, 140.97714233398438, 40.93107604980469, -0.3311309814453125, 0.0863800048828125, 94.94146728515625, -183.251708984375, 175.76806640625, 61.38623046875, -105.61019897460938, 52.291526794433594, 13.917572021484375, 105.36497497558594, 73.0184326171875, 248.85812377929688, 78.387451171875, 18.612709045410156, -7.59619140625, 253.48403930664062, 156.12005615234375, 81.30764770507812, 10.88775634765625, 62.105010986328125, 146.3515625, 182.0657958984375, 99.8687744140625, 88.1573486328125, -53.20393371582031, 159.3406982421875, 37.51947021484375, 51.717689514160156, 74.1524658203125, 67.09408569335938, 157.82540893554688, 38.97926330566406, 210.11514282226562, 57.672698974609375, 3.500579833984375, 102.60076904296875, 65.64669799804688, 222.85147094726562, 71.9923095703125, 72.36289978027344, 223.8642578125, 42.40027618408203, 125.34701538085938, 91.49688720703125, -14.542205810546875, 331.95709228515625, -35.42900085449219, -80.35430908203125, -0.426422119140625, -41.39630126953125, 69.54681396484375, 34.43922424316406, 29.174972534179688, 87.330078125, 12.509969711303711, 164.56930541992188, 108.6534652709961, -15.59912109375, 82.41751098632812, 78.90960693359375, 60.414947509765625, 183.9420623779297, 3.2158355712890625, 119.15130615234375, -36.04230499267578, 193.6224365234375, -62.993133544921875, -35.23712158203125, 13.24489974975586, 45.16276550292969, 116.45916748046875, -13.68564224243164, 32.01277160644531, 106.82440185546875, 33.1519775390625, -5.529083251953125, -51.780029296875, 132.259521484375, 37.967041015625, 45.72064208984375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000330.npy"}
{"epoch": 0.6910994764397905, "step": 331, "batch_size": 128, "mean": 65.423828125, "std": 112.24449157714844, "min": -253.88079833984375, "p10": -53.101254272460935, "median": 48.72346496582031, "p90": 223.6051513671875, "max": 470.53704833984375, "pos_frac": 0.7578125, "sample": [25.09784698486328, 226.2255859375, 126.46147155761719, -179.9649200439453, -43.642364501953125, 61.804840087890625, 59.96296691894531, 196.26547241210938, 35.21746826171875, 31.295473098754883, -91.8876953125, -107.06301879882812, 0.24462890625, 0.0, 52.754150390625, -0.6781082153320312, 238.60821533203125, 37.57551193237305, 237.17288208007812, 89.38117980957031, 139.56088256835938, 0.27532196044921875, -3.48968505859375, 122.45854187011719, 20.5057373046875, 57.306854248046875, 209.00921630859375, 43.14031982421875, 126.79998779296875, 126.38995361328125, 33.74544143676758, 91.30171203613281, 224.037353515625, 72.47242736816406, 26.81182861328125, 132.6724853515625, 143.58489990234375, -9.65179443359375, 192.2462158203125, 232.03306579589844, 33.40076446533203, 60.12860107421875, -129.8179931640625, -16.374290466308594, 65.18612670898438, -41.630706787109375, 12.6282958984375, 117.92393493652344, 1.21826171875, -19.120506286621094, 11.738616943359375, -10.021148681640625, 83.820556640625, -45.083251953125, 178.6211395263672, 134.27523803710938, -188.71661376953125, 181.53643798828125, 46.296722412109375, 257.3212890625, -13.18280029296875, 223.419921875, 31.81973648071289, 142.39920043945312, 90.61532592773438, 40.432464599609375, 60.217803955078125, -0.4427490234375, -55.129669189453125, 121.01101684570312, 86.39840698242188, -133.39706420898438, 80.20134735107422, 87.64615631103516, 93.78514099121094, 165.40740966796875, 3.865509033203125, 152.1388702392578, 51.59051513671875, 69.52496337890625, 323.48651123046875, 86.28155517578125, 148.112060546875, 2.711305618286133, -166.18714904785156, 41.14082336425781, 341.3274841308594, 13.240434646606445, -78.89039611816406, 266.4234619140625, 221.24807739257812, 130.1895751953125, -253.88079833984375, 30.35211181640625, 239.55511474609375, 62.68896484375, 106.30361938476562, -9.45556640625, 470.53704833984375, -57.25335693359375, 76.745849609375, -0.552642822265625, 249.34210205078125, 103.04544067382812, 50.605377197265625, 132.47222900390625, 25.9398193359375, 83.5447998046875, 69.1405029296875, 46.841552734375, -85.71029663085938, -36.569366455078125, -105.98678588867188, 31.948211669921875, 24.908065795898438, -52.23193359375, -48.82160949707031, 40.34806442260742, 36.71903991699219, 20.371749877929688, 155.62648010253906, 334.362548828125, 39.81231689453125, 191.79367065429688, 32.8822021484375, -21.873046875, 43.472633361816406, 10.448028564453125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000331.npy"}
{"epoch": 0.6931937172774869, "step": 332, "batch_size": 128, "mean": 67.59410858154297, "std": 112.36414337158203, "min": -254.4688720703125, "p10": -64.82407684326172, "median": 53.07054138183594, "p90": 205.47550506591796, "max": 406.5303039550781, "pos_frac": 0.71875, "sample": [-26.500749588012695, 128.4970703125, -3.0034027099609375, 383.941650390625, -64.560546875, 8.06719970703125, 141.06085205078125, 122.84078979492188, -81.96257019042969, 1.767303466796875, 155.4912109375, 33.82904052734375, 2.9785919189453125, 37.51246643066406, 266.7166748046875, 406.5303039550781, -35.93919372558594, 41.173614501953125, 81.71973419189453, -12.1861572265625, 64.24098205566406, -67.95413970947266, -21.103057861328125, 32.48371887207031, -44.06474304199219, -40.39082336425781, 42.72981262207031, -25.585662841796875, 163.0891571044922, 302.5723876953125, 118.45077514648438, 70.10760498046875, 26.97735595703125, 108.1900634765625, 0.95458984375, 221.89093017578125, 161.2906494140625, 213.38119506835938, 121.51614379882812, 175.8988800048828, 116.34588623046875, 43.60517883300781, 246.88436889648438, 12.58755111694336, -91.67167663574219, -125.37457275390625, 174.45489501953125, 138.18023681640625, -3.058065414428711, -78.35739135742188, 11.861724853515625, 191.84075927734375, 252.47265625, 202.81475830078125, -254.4688720703125, -67.64724731445312, 161.675537109375, -147.64576721191406, 88.84176635742188, 5.70306396484375, -34.639312744140625, 125.14105224609375, -24.90814781188965, 165.84832763671875, 134.3380126953125, 272.6497497558594, 94.88510131835938, 72.81919860839844, 117.53021240234375, 202.84817504882812, 165.33837890625, -9.919975280761719, 62.006385803222656, 39.551422119140625, -9.103546142578125, 36.845611572265625, -81.69198608398438, 72.71170806884766, 244.95559692382812, -194.59912109375, -40.225677490234375, -6.5357513427734375, 14.7100830078125, 320.4595947265625, 109.0091552734375, 198.5338134765625, 44.144615173339844, 24.80609130859375, 9.632080078125, 142.24949645996094, 111.220703125, 83.98699188232422, 51.10780334472656, 312.87042236328125, -18.637741088867188, -41.33058166503906, 13.163177490234375, 96.85834503173828, 55.03327941894531, 68.1431884765625, 130.56637573242188, 89.42767333984375, 153.23068237304688, 197.7972869873047, 14.524127960205078, 45.53050994873047, 44.07635498046875, 92.4576416015625, 74.2803955078125, 134.81707763671875, 211.60594177246094, 4.739604949951172, 101.90426635742188, 153.6591796875, -69.3940658569336, 32.871734619140625, 35.28172302246094, -17.815200805664062, 120.17633056640625, 61.415802001953125, -65.43898010253906, 73.27423095703125, 172.70086669921875, -34.3782958984375, 0.0, -12.943115234375, -149.07594299316406, -36.7476806640625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000332.npy"}
{"epoch": 0.6952879581151833, "step": 333, "batch_size": 128, "mean": 56.30854034423828, "std": 90.72771453857422, "min": -150.9722137451172, "p10": -47.23264007568359, "median": 43.13587951660156, "p90": 182.3955871582031, "max": 266.5382995605469, "pos_frac": 0.75, "sample": [168.91159057617188, -143.60733032226562, 6.562297821044922, 261.55780029296875, 10.018035888671875, 155.58673095703125, 45.62178039550781, 14.824310302734375, 76.27790069580078, 9.217559814453125, 155.87710571289062, 54.02197265625, 53.12408447265625, -63.764625549316406, -68.31353759765625, 103.46163940429688, 59.14971923828125, -33.913818359375, 78.08995056152344, 180.42462158203125, 45.478485107421875, 35.90025329589844, -44.06519317626953, 114.0447006225586, 57.32073974609375, 5.0538177490234375, 22.039093017578125, 174.5286865234375, 216.31564331054688, -51.168701171875, 56.8394775390625, -15.826904296875, 158.752685546875, -131.76162719726562, 80.560546875, 38.506500244140625, 170.76348876953125, -35.36632537841797, 190.66244506835938, 8.121734619140625, -105.17866516113281, -22.858238220214844, -41.900634765625, 141.10345458984375, 0.0, 5.16876220703125, 33.286773681640625, 155.75189208984375, 26.62427520751953, 70.48468017578125, -150.9722137451172, 255.48098754882812, 51.531402587890625, 120.15290832519531, -48.56170654296875, -46.66304016113281, -70.29238891601562, 237.46337890625, 45.85186767578125, -17.82080078125, 27.987442016601562, 8.883262634277344, -39.83502197265625, 65.80230712890625, 8.955841064453125, 150.8074951171875, -53.80519104003906, 26.4522705078125, 79.771728515625, 25.914749145507812, 52.805145263671875, 24.56951904296875, 137.38327026367188, 75.16423797607422, 99.63803100585938, 54.36163330078125, 56.6871337890625, 216.7335205078125, 32.780120849609375, 127.89453125, 157.10006713867188, -119.214599609375, 14.432479858398438, 175.7991943359375, 242.6043701171875, 122.57967376708984, 23.19195556640625, 211.11093139648438, -26.984161376953125, 117.032470703125, -21.623611450195312, 186.9945068359375, 40.79327392578125, -37.27978515625, -33.178131103515625, 29.63311004638672, -75.93914794921875, 177.154052734375, 212.157470703125, 63.453125, 94.39190673828125, 80.05575561523438, 2.6740474700927734, 39.482574462890625, 229.45379638671875, 62.655120849609375, 26.88873291015625, 90.37432861328125, 15.905029296875, 149.01071166992188, -52.30189514160156, 266.5382995605469, -24.482940673828125, 1.309326171875, 8.435272216796875, 48.968353271484375, 121.951904296875, 138.45062255859375, 33.32536315917969, -12.75948715209961, -13.376510620117188, 126.60401916503906, 17.27117919921875, -27.541351318359375, -5.107147216796875, 192.33485412597656, 72.7811279296875, 26.95082664489746], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000333.npy"}
{"epoch": 0.6973821989528796, "step": 334, "batch_size": 128, "mean": 68.69866943359375, "std": 99.20670318603516, "min": -203.73452758789062, "p10": -40.957228851318355, "median": 65.17107009887695, "p90": 195.52784423828123, "max": 308.92071533203125, "pos_frac": 0.765625, "sample": [209.51614379882812, 120.77505493164062, 70.65936279296875, 0.392425537109375, 3.0899620056152344, 120.76113891601562, 144.9428253173828, 61.94183349609375, -28.497390747070312, 165.44752502441406, 56.062286376953125, -3.255859375, 128.80239868164062, -9.13543701171875, 121.37857055664062, -32.5731201171875, -181.9751739501953, -42.90778350830078, 185.59503173828125, -98.68377685546875, 28.493675231933594, 24.256484985351562, 245.57781982421875, 277.2606201171875, 159.39739990234375, 73.19369506835938, -0.708038330078125, 118.82550048828125, 175.62118530273438, -7.493534088134766, -2.8057861328125, -203.73452758789062, 0.0, -36.945831298828125, 19.7852783203125, -40.12127685546875, 18.542755126953125, 109.55392456054688, -107.14181518554688, 128.32846069335938, 126.1031723022461, 128.62603759765625, 234.94955444335938, 147.76904296875, 238.36489868164062, 11.03155517578125, 246.81698608398438, 118.34036254882812, 29.300750732421875, 63.924957275390625, 71.936767578125, 11.948699951171875, 182.60939025878906, 10.66827392578125, 25.923789978027344, -127.44415283203125, 128.55438232421875, 299.33367919921875, 14.697662353515625, 140.52529907226562, 55.17913818359375, 20.140625, -119.18480682373047, 128.13186645507812, 22.91817855834961, -15.181503295898438, 68.76486206054688, 92.41166687011719, -15.044059753417969, 21.83172607421875, 18.955535888671875, 6.1717529296875, -24.7401123046875, 169.127685546875, 59.58758544921875, 82.9774169921875, 0.4529132843017578, 96.21121215820312, 28.965118408203125, 150.34783935546875, -71.26446533203125, 159.46249389648438, 193.1937255859375, 146.4679412841797, 132.7877655029297, -69.30764770507812, -49.464263916015625, 178.4769287109375, 132.03555297851562, 4.084259033203125, 156.8751220703125, 150.5762939453125, 55.9210205078125, 178.94448852539062, 82.91513061523438, 117.70785522460938, 214.59515380859375, 182.02459716796875, 30.621978759765625, 66.41718292236328, 91.22059631347656, 78.6317138671875, 23.9427490234375, -69.31689453125, -31.630615234375, 57.11293029785156, 72.20196533203125, -26.17273712158203, 86.33584594726562, 28.217147827148438, 236.47381591796875, 93.18017578125, 85.03268432617188, 153.75296020507812, 12.329345703125, 6.993492126464844, 86.72181701660156, 200.97412109375, -132.86558532714844, 203.23989868164062, 308.92071533203125, -25.843795776367188, 12.826898574829102, 13.542648315429688, 98.21881103515625, -4.03009033203125, -45.85015869140625, 261.9697265625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000334.npy"}
{"epoch": 0.6994764397905759, "step": 335, "batch_size": 128, "mean": 68.06571960449219, "std": 107.83258056640625, "min": -198.08444213867188, "p10": -59.48536834716797, "median": 65.76408386230469, "p90": 195.2024169921875, "max": 336.6726379394531, "pos_frac": 0.7265625, "sample": [-58.19305419921875, -198.08444213867188, 144.80210876464844, -50.6258544921875, -53.402740478515625, 162.64093017578125, 74.21990966796875, 302.427978515625, 126.10411071777344, -5.059104919433594, -44.401397705078125, -158.42674255371094, 95.87136840820312, 177.8380889892578, -22.046249389648438, 146.04534912109375, -2.807464599609375, 109.6964111328125, -2.6893386840820312, 233.38912963867188, 96.6822738647461, 47.43311309814453, 171.76535034179688, -59.14549255371094, -20.752471923828125, 138.98448181152344, -102.38687133789062, 112.58961486816406, 111.46940612792969, -166.85879516601562, -85.1098861694336, 333.94744873046875, 93.7557373046875, 9.919036865234375, 182.61068725585938, -0.023193359375, 13.144927978515625, 215.5797119140625, 186.99069213867188, 143.19790649414062, -39.528961181640625, 130.9091796875, 15.763427734375, 123.86201477050781, 35.23981475830078, 140.33450317382812, 152.72305297851562, 172.09165954589844, 137.108642578125, 121.92486572265625, 115.68043518066406, 29.134796142578125, 144.05108642578125, 62.825836181640625, 90.260986328125, 137.37945556640625, 328.7119140625, -50.490150451660156, -53.0322265625, 20.300827026367188, -7.978445053100586, -11.773414611816406, 19.776723861694336, -15.109649658203125, 125.29129028320312, 26.774627685546875, 94.05218505859375, -157.99862670898438, 336.6726379394531, 159.9530487060547, 120.74468994140625, 25.626312255859375, -4.698028564453125, -88.20114135742188, 88.70692443847656, 102.38909912109375, 257.53411865234375, 48.929443359375, 25.963897705078125, 38.00299072265625, 122.71846008300781, 108.3310546875, 206.45620727539062, 175.2178955078125, 215.18890380859375, 120.53741455078125, 0.0, 194.8511199951172, 196.02210998535156, 20.830223083496094, -79.81747436523438, 23.088668823242188, 106.39280700683594, 83.42543029785156, 42.71563720703125, -3.4587478637695312, 160.49237060546875, 13.971128463745117, 6.292724609375, 38.1925048828125, 190.825927734375, 213.57366943359375, 25.67139434814453, 174.9923858642578, -100.22805786132812, 298.8267822265625, -99.3353271484375, 193.59500122070312, 11.544158935546875, -60.278411865234375, 8.887786865234375, 79.75894165039062, 20.084640502929688, -164.05291748046875, 60.312232971191406, 34.25923156738281, 198.2344970703125, 141.57003784179688, -40.26251220703125, 68.70233154296875, 124.61125183105469, 26.475845336914062, -66.98575592041016, 18.53802490234375, -32.914329528808594, 46.78643798828125, 98.9434814453125, 87.82411193847656], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000335.npy"}
{"epoch": 0.7015706806282722, "step": 336, "batch_size": 128, "mean": 64.64444732666016, "std": 113.56724548339844, "min": -244.79786682128906, "p10": -47.46378402709961, "median": 48.45198440551758, "p90": 203.84463653564453, "max": 582.6709594726562, "pos_frac": 0.71875, "sample": [259.84368896484375, 68.58645629882812, 121.44064331054688, -2.04034423828125, 15.011970520019531, 272.47467041015625, 111.6717529296875, 244.56210327148438, 51.42448425292969, 184.0244598388672, 44.037025451660156, 299.98272705078125, -121.3843994140625, 35.6863899230957, 74.95183563232422, 75.08660888671875, -50.241546630859375, 109.25119018554688, 75.07843017578125, -72.23165893554688, 201.9633331298828, -25.352333068847656, 52.551544189453125, 170.26217651367188, 9.718595504760742, 63.4639892578125, 82.23492431640625, -30.51318359375, -2.0622634887695312, 50.755592346191406, 92.62034606933594, -27.464046478271484, 7.800933837890625, 88.954345703125, -8.052825927734375, 150.90298461914062, 92.43167114257812, 247.76788330078125, 58.52350616455078, -6.7794189453125, 305.7430114746094, 4.817176818847656, 39.782752990722656, 152.3140411376953, 36.376953125, 154.1158447265625, 160.50796508789062, -77.94387817382812, 59.91082763671875, 130.793701171875, 54.918060302734375, 9.897541046142578, 64.19942474365234, 200.0950469970703, -10.63995361328125, -71.19882202148438, 208.23434448242188, 68.19337463378906, 52.5111083984375, 237.19943237304688, 191.93408203125, -33.073486328125, -114.66729736328125, 0.0, 160.07369995117188, 117.80372619628906, -15.3726806640625, 81.78596496582031, -244.79786682128906, 154.12049865722656, -81.46795654296875, 145.46572875976562, -116.67874145507812, 8.902801513671875, 23.415863037109375, 22.863006591796875, -20.3101806640625, 81.87547302246094, 53.10710906982422, -48.58753204345703, 148.01779174804688, 57.78985595703125, -4.667213439941406, -16.59771728515625, -31.66485595703125, 582.6709594726562, 44.362701416015625, 134.8631591796875, 15.347885131835938, 122.15206909179688, -37.55717468261719, -53.26609802246094, -8.17510986328125, 29.63134765625, 28.46826171875, 115.90406036376953, 123.47207641601562, 16.901165008544922, 9.003704071044922, 65.28515625, 37.85943603515625, -32.59230041503906, -21.38447380065918, 261.9512939453125, 6.997955322265625, 97.17337036132812, 30.1253662109375, 88.28439331054688, 41.821136474609375, 32.16851806640625, -146.51251220703125, 154.05029296875, -13.694610595703125, -1.592681884765625, 12.321372985839844, 8.45147705078125, 201.13259887695312, -153.48223876953125, 30.218109130859375, 62.21001434326172, 143.36376953125, 252.65423583984375, 426.93121337890625, 241.3904266357422, -39.912628173828125, 46.14837646484375, -46.982177734375, 0.28310394287109375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000336.npy"}
{"epoch": 0.7036649214659686, "step": 337, "batch_size": 128, "mean": 74.41445922851562, "std": 100.18466186523438, "min": -123.85232543945312, "p10": -36.321733474731445, "median": 58.323455810546875, "p90": 204.82361907958983, "max": 359.1015625, "pos_frac": 0.7734375, "sample": [85.981201171875, 61.97572326660156, 147.05032348632812, 126.2558822631836, 53.64178466796875, 248.3408203125, 166.42767333984375, -9.688919067382812, 6.87945556640625, 48.96138000488281, -67.44012451171875, 61.466064453125, -11.701171875, 86.44757080078125, 24.332427978515625, 176.5055389404297, 151.1287384033203, -32.95503234863281, 174.59390258789062, 52.84068298339844, 13.726470947265625, -10.9481201171875, 38.36268997192383, 119.24676513671875, 8.652414321899414, 184.6910400390625, 120.86320495605469, 70.2337646484375, 335.56402587890625, 182.73284912109375, -73.44259643554688, 173.91595458984375, 30.41158676147461, -30.086669921875, -6.575721740722656, 218.1370849609375, 87.04055786132812, 19.802528381347656, 59.628753662109375, 100.21336364746094, -91.56613159179688, 67.38423919677734, 35.69697570800781, -87.44438171386719, 88.48873901367188, 197.9609375, 108.68353271484375, -40.10791778564453, -67.80567932128906, -23.529327392578125, 222.33990478515625, 110.4169921875, 118.95404052734375, -58.67889404296875, -16.73792266845703, 200.84140014648438, 168.89407348632812, 168.06216430664062, 4.822813034057617, 81.99838256835938, -98.17279052734375, 17.97266387939453, 26.93035888671875, 99.49727630615234, 54.450164794921875, 315.94378662109375, 24.803314208984375, 42.59990692138672, -35.99905014038086, -114.10948181152344, 28.415573120117188, 123.07659912109375, 145.44493103027344, -15.378265380859375, -10.74151611328125, 111.56322479248047, 52.09979248046875, 266.9659423828125, 35.84223175048828, 40.477874755859375, 54.68841552734375, 29.88531494140625, 57.766204833984375, 7.8857421875, -113.49624633789062, -40.53605270385742, 355.457275390625, 52.40852355957031, 1.2509765625, 9.995965957641602, 67.76498413085938, 206.71385192871094, 99.09722900390625, 182.93870544433594, -14.196807861328125, 124.97711181640625, 34.04901123046875, 103.15460205078125, 359.1015625, 109.31396484375, 85.0184326171875, 260.7314758300781, 26.826759338378906, -37.07466125488281, 22.32898712158203, 204.01351928710938, 90.98455810546875, 113.68905639648438, -30.876495361328125, 95.42839050292969, 90.99671936035156, 255.4145965576172, -32.36822509765625, -123.85232543945312, 196.68109130859375, 50.841796875, 16.7018985748291, 15.480262756347656, -33.640254974365234, 0.3864288330078125, 58.880706787109375, 177.45762634277344, -20.537425994873047, 208.82424926757812, 118.25399780273438, 148.39532470703125, 227.34349060058594, 156.92999267578125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000337.npy"}
{"epoch": 0.7057591623036649, "step": 338, "batch_size": 128, "mean": 67.62041473388672, "std": 102.02483367919922, "min": -228.32235717773438, "p10": -50.31690979003906, "median": 52.65875244140625, "p90": 212.36577453613282, "max": 352.3829650878906, "pos_frac": 0.7421875, "sample": [244.8490753173828, -130.7589874267578, 13.828125, -1.79541015625, 24.20672607421875, 94.30101013183594, 30.386566162109375, 156.77394104003906, -6.205413818359375, 72.90606689453125, -84.48970031738281, -105.94265747070312, 186.6591796875, 109.56097412109375, 128.2054443359375, -57.013092041015625, 91.97039794921875, 0.5678253173828125, -37.581905364990234, -3.386810302734375, 93.6953125, 170.5225830078125, -94.04757690429688, 30.302078247070312, 72.61456298828125, 1.9892311096191406, 87.05647277832031, 20.289833068847656, 8.278398513793945, 82.2935791015625, -8.572998046875, 192.42977905273438, 292.1668701171875, 352.3829650878906, 30.271041870117188, 6.920989990234375, 169.34283447265625, 132.55563354492188, 121.20286560058594, 32.63238525390625, 124.71279907226562, 81.51553344726562, -2.6030521392822266, 122.57952880859375, 250.4569091796875, -73.85940551757812, 49.6102294921875, 10.876914978027344, -77.17568969726562, -8.549335479736328, 47.33641815185547, 134.66455078125, -11.316497802734375, -34.50947570800781, -18.81328582763672, 116.32723999023438, 47.22889709472656, -49.861572265625, 127.0500259399414, 7.492279052734375, 141.23309326171875, 48.336944580078125, 28.832977294921875, 25.622314453125, -18.421592712402344, -113.05877685546875, 104.16848754882812, 211.66165161132812, -1.8299560546875, 49.998291015625, 91.42987060546875, 102.85708618164062, 38.804595947265625, 114.93455505371094, 204.20046997070312, 118.0292739868164, -17.805816650390625, 101.86993408203125, -87.94219970703125, 248.77169799804688, 231.81784057617188, 11.27264404296875, -16.106353759765625, 12.653522491455078, -63.82390594482422, 295.9418640136719, 176.24911499023438, 64.70811462402344, -51.379364013671875, 134.0694580078125, 40.580963134765625, 247.32138061523438, -228.32235717773438, -39.38273620605469, 31.25927734375, 9.634613037109375, 45.422607421875, 104.35552978515625, 340.5010681152344, 95.96833801269531, 166.728515625, 5.112846374511719, -36.543304443359375, 123.96224975585938, -22.91766357421875, 12.704734802246094, 64.20758056640625, 34.529052734375, 232.74838256835938, 231.13311767578125, 167.88555908203125, 58.36328125, 9.49154281616211, 166.83657836914062, 128.66014099121094, 60.545921325683594, -37.48637390136719, 172.04669189453125, 55.3192138671875, 59.148223876953125, 82.38177490234375, 63.18775939941406, -41.37608337402344, 148.2579345703125, 214.00872802734375, 171.71917724609375, -51.9903564453125, 217.7822265625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000338.npy"}
{"epoch": 0.7078534031413612, "step": 339, "batch_size": 128, "mean": 73.50955963134766, "std": 106.82032775878906, "min": -186.50152587890625, "p10": -62.992227172851564, "median": 70.79028701782227, "p90": 212.7740234375, "max": 300.9530944824219, "pos_frac": 0.7265625, "sample": [49.15580749511719, -92.35873413085938, 176.93690490722656, 156.15426635742188, 127.48580932617188, 187.89035034179688, -71.38824462890625, -31.503517150878906, 180.52699279785156, 60.316619873046875, 70.3331298828125, -52.735504150390625, 71.24744415283203, 184.90704345703125, 25.948883056640625, -24.11376953125, 1.3925971984863281, 172.46783447265625, -74.75685119628906, 115.60861206054688, 66.14047241210938, 53.274627685546875, 111.2103271484375, 135.43344116210938, -17.7305908203125, 139.8893280029297, 167.16439819335938, -37.84552001953125, -157.00848388671875, 8.61065673828125, 108.04925537109375, 13.77264404296875, 47.02787780761719, -33.37101745605469, 64.81842041015625, -48.47515869140625, 87.690185546875, 184.29531860351562, 46.663360595703125, -42.83043670654297, 186.6218719482422, -13.441192626953125, 105.41407775878906, 133.34690856933594, 233.85763549804688, 120.22828674316406, 123.92111206054688, 189.44558715820312, 227.4652099609375, 54.4608154296875, 44.66648864746094, -5.036773681640625, 203.19793701171875, -105.12855529785156, 60.1810302734375, 119.28964233398438, 300.9530944824219, 243.395263671875, 126.123779296875, 0.0, 106.74713897705078, -72.90145874023438, 263.90087890625, -156.53762817382812, 81.08499145507812, -63.141571044921875, 289.6643981933594, 65.6719970703125, 130.44503784179688, 135.44149780273438, -31.6263427734375, 132.07278442382812, 0.0, 233.3238525390625, -6.3974151611328125, 124.42137145996094, 66.89383697509766, 118.71320343017578, 80.31952667236328, 118.58416748046875, -62.36365509033203, 213.97573852539062, 11.902923583984375, 129.7994384765625, 44.238067626953125, 137.44146728515625, 84.15998077392578, 45.3065185546875, 108.42437744140625, 96.07098388671875, 46.29278564453125, 148.5419921875, -186.50152587890625, 258.66546630859375, 116.51687622070312, 90.11422729492188, 6.79266357421875, 50.92570495605469, 212.25900268554688, 11.523223876953125, 109.91156005859375, 65.57606506347656, 62.122802734375, -47.835052490234375, 0.0, 117.9122314453125, 117.52361297607422, -129.58975219726562, 202.95379638671875, -2.1118927001953125, -1.712310791015625, -71.77688598632812, 177.499755859375, 162.7744140625, -62.92822265625, 16.53668212890625, -15.562591552734375, -163.00808715820312, 229.1938018798828, 243.074462890625, 43.761749267578125, 187.51083374023438, 219.8592529296875, -29.879608154296875, 188.3594207763672, 12.489410400390625, -176.45437622070312, 292.99847412109375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000339.npy"}
{"epoch": 0.7099476439790576, "step": 340, "batch_size": 128, "mean": 63.135711669921875, "std": 119.01041412353516, "min": -268.221923828125, "p10": -64.63455429077149, "median": 51.865478515625, "p90": 227.72059631347656, "max": 378.5054931640625, "pos_frac": 0.671875, "sample": [378.5054931640625, 31.9246826171875, -226.36331176757812, 156.86767578125, 59.569610595703125, 272.2069091796875, 42.781005859375, -42.7779541015625, 14.1962890625, -268.221923828125, 100.42411804199219, 185.09043884277344, 4.658599853515625, -25.08270263671875, 142.37652587890625, 338.69952392578125, 248.97509765625, 22.94708251953125, 156.85003662109375, 37.181434631347656, 20.909942626953125, 0.7137451171875, 77.60626220703125, 122.34957885742188, 206.20086669921875, 275.44110107421875, -172.83819580078125, -64.53839874267578, -26.349552154541016, 52.608970642089844, 242.61798095703125, 139.8397216796875, 13.155471801757812, 251.59231567382812, -36.940277099609375, -43.684288024902344, -1.4334716796875, -62.607208251953125, 192.36135864257812, 105.1861801147461, 141.17343139648438, 165.65956115722656, -71.90486145019531, 71.43228149414062, -37.91571044921875, -132.57220458984375, 7.919704437255859, -16.37127685546875, -15.43109130859375, 51.767333984375, -140.87588500976562, -0.6163005828857422, 178.2904052734375, 14.575279235839844, 209.51211547851562, 65.58697509765625, 12.277496337890625, 9.570220947265625, 131.50515747070312, -23.091384887695312, 33.11883544921875, 42.376869201660156, 25.95245361328125, 70.56198120117188, -151.20989990234375, 253.39999389648438, -112.22317504882812, 239.28872680664062, 164.423095703125, 167.50411987304688, -61.749908447265625, 122.65176391601562, -71.28764343261719, 38.03221130371094, -39.924072265625, 90.70365142822266, 227.72454833984375, 255.53305053710938, 336.6882019042969, 66.81880187988281, 109.01277160644531, -146.6457977294922, 227.71890258789062, 80.73410034179688, 139.85145568847656, -62.750518798828125, 47.66600036621094, -114.28627014160156, 209.37823486328125, 42.815521240234375, 95.53875732421875, -13.645393371582031, 71.40689086914062, 46.469757080078125, -2.15362548828125, -13.223615646362305, -63.884033203125, -9.105117797851562, 0.0, 197.0751190185547, -27.241958618164062, 156.9165496826172, -33.079559326171875, 73.10600280761719, 93.83030700683594, 75.0201416015625, 299.9736328125, -12.726615905761719, 105.4036865234375, -135.2044677734375, 167.91583251953125, 0.0, 111.89950561523438, -28.85546875, 134.336669921875, -11.614501953125, 194.21873474121094, 126.64797973632812, 168.5170440673828, 61.05424499511719, 122.57528686523438, 9.6322021484375, 67.92237091064453, 129.07061767578125, -64.85891723632812, 51.963623046875, 164.69190979003906, -3.592618942260742], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000340.npy"}
{"epoch": 0.7120418848167539, "step": 341, "batch_size": 128, "mean": 52.384986877441406, "std": 121.15853881835938, "min": -309.2510986328125, "p10": -100.37050476074218, "median": 55.04905700683594, "p90": 219.98871307373042, "max": 352.1365966796875, "pos_frac": 0.6796875, "sample": [14.503547668457031, 55.280303955078125, -7.232635498046875, 96.90955352783203, -81.97412109375, 103.99612426757812, -97.75491333007812, 68.69644165039062, -33.984375, 49.11138916015625, 69.22897338867188, 8.676691055297852, -0.055938720703125, 57.4463996887207, 106.66326904296875, 167.20169067382812, 121.238037109375, -127.1999282836914, -110.284423828125, 63.18254089355469, -66.3372573852539, -17.97030258178711, -34.877777099609375, 97.3536376953125, 117.23866271972656, -7.454742431640625, 185.33880615234375, -50.988311767578125, -110.53302001953125, 261.300537109375, -74.6571044921875, -83.09658813476562, 120.92236328125, 67.09480285644531, 67.17173767089844, 53.542724609375, -14.73516845703125, -60.780059814453125, -2.600688934326172, 29.37816619873047, 203.51931762695312, 105.81216430664062, 145.1824951171875, -309.2510986328125, 311.62591552734375, -17.68182373046875, 142.90310668945312, 84.19894409179688, 150.00161743164062, 10.72265625, 94.38198852539062, 216.6574249267578, -21.753875732421875, 130.174560546875, 55.7703857421875, 151.1551513671875, 271.8630065917969, 15.802947998046875, 48.81146240234375, 125.0364990234375, 163.2604522705078, 229.77117919921875, 114.12408447265625, 287.09649658203125, 143.1328887939453, -104.71792602539062, 349.38067626953125, 89.37620544433594, 115.5552978515625, 70.29280853271484, 103.29296875, 17.210235595703125, 307.7984619140625, 63.85935974121094, 44.85905456542969, 35.3114013671875, 69.04753112792969, 29.030731201171875, 33.505584716796875, -139.703857421875, 62.409088134765625, 25.605514526367188, 11.599983215332031, 79.43306732177734, -98.50732421875, 86.2833023071289, 54.81781005859375, -65.17106628417969, -51.087249755859375, 227.76171875, -75.77122497558594, 69.65908813476562, 34.12188720703125, 30.661102294921875, -11.384323120117188, -160.51043701171875, 165.06008911132812, 8.0657958984375, 272.51361083984375, 268.40234375, 106.44267272949219, -18.66632080078125, 78.1368408203125, 126.12107849121094, -205.53326416015625, 20.2528076171875, 262.4606628417969, 209.3108367919922, 58.76788330078125, -65.0015869140625, 352.1365966796875, 13.587793350219727, 121.18905639648438, -13.612701416015625, 65.19418334960938, -149.47232055664062, -140.03036499023438, -171.18260192871094, 67.41960144042969, -85.09368896484375, 183.25320434570312, -9.141014099121094, -213.14161682128906, 23.756500244140625, 276.17120361328125, 51.75555419921875, -105.45724487304688, -3.683483123779297], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000341.npy"}
{"epoch": 0.7141361256544503, "step": 342, "batch_size": 128, "mean": 62.61082458496094, "std": 96.73596954345703, "min": -219.60061645507812, "p10": -54.80005111694336, "median": 52.6882438659668, "p90": 199.19605255126953, "max": 292.4520263671875, "pos_frac": 0.765625, "sample": [15.4727783203125, 20.511581420898438, 24.50927734375, 62.005035400390625, 34.501220703125, 116.40866088867188, -26.559539794921875, 79.08987426757812, -86.14575958251953, 232.80328369140625, -49.7294921875, -24.399879455566406, 237.61553955078125, 22.472259521484375, 52.101707458496094, 27.6798095703125, 211.68394470214844, -82.95684814453125, 13.26904296875, 0.0, 120.94259643554688, 83.44037628173828, 57.59215545654297, -46.657196044921875, 6.5933837890625, 45.467529296875, 53.46360778808594, 94.53057861328125, -12.37884521484375, 201.876220703125, 37.885650634765625, 185.87896728515625, 115.70529174804688, 186.7889404296875, 33.560089111328125, -153.07443237304688, 69.46566009521484, 76.96501922607422, -58.10173034667969, 74.42474365234375, 215.1689453125, 62.510223388671875, 32.4071044921875, 87.69593048095703, -2.13427734375, 196.85992431640625, 41.03120422363281, 123.12092590332031, 40.331573486328125, 33.73602294921875, 117.08798217773438, 22.010433197021484, 57.78759765625, 43.642791748046875, -3.1855926513671875, 212.86544799804688, -66.2716064453125, 40.7532958984375, -219.60061645507812, -15.18731689453125, 102.02456665039062, 7.594024658203125, 95.7254638671875, 108.33609008789062, -30.573333740234375, 71.00662231445312, 76.39398193359375, 55.777587890625, 30.451839447021484, 169.76910400390625, 9.783416748046875, -11.871734619140625, -54.23731994628906, 99.28533935546875, 37.548614501953125, -104.67840576171875, 57.59564208984375, 14.89434814453125, 205.94692993164062, 197.195068359375, 169.25875854492188, 8.861711502075195, -19.348533630371094, 40.155517578125, 105.06155395507812, 117.56033325195312, -23.4849853515625, 47.83842468261719, 180.02792358398438, 4.405252456665039, 144.7043914794922, 95.291259765625, 23.7799072265625, -6.893440246582031, -133.99600219726562, 247.28033447265625, -150.00045776367188, 123.48348999023438, 140.3114013671875, 210.81033325195312, -65.59527587890625, -53.154388427734375, -157.69970703125, 61.437652587890625, 120.38214111328125, 110.30857849121094, 42.00836944580078, 217.90093994140625, 69.459716796875, 292.4520263671875, 152.33480834960938, 198.0474090576172, 160.1243896484375, 33.98717498779297, 190.24288940429688, 15.680459976196289, 126.86761474609375, -56.11309051513672, 272.01226806640625, 160.24789428710938, 22.88104248046875, 53.2747802734375, 27.441307067871094, 165.56524658203125, 165.39404296875, -58.41719055175781, 225.58428955078125, -16.87615966796875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000342.npy"}
{"epoch": 0.7162303664921466, "step": 343, "batch_size": 128, "mean": 59.822303771972656, "std": 97.10031127929688, "min": -172.19308471679688, "p10": -58.505001831054685, "median": 58.529693603515625, "p90": 189.1994201660156, "max": 289.01898193359375, "pos_frac": 0.65625, "sample": [-97.24954223632812, 123.04348754882812, 153.03933715820312, 2.6652774810791016, 20.199554443359375, -27.84416961669922, -64.89862823486328, 166.73983764648438, 81.87174224853516, 49.83526611328125, -31.587608337402344, -41.033485412597656, 122.78047180175781, 270.87152099609375, -48.879730224609375, 80.45697784423828, 87.82856750488281, 200.5035400390625, -83.49404907226562, 255.12661743164062, 130.45156860351562, 191.36410522460938, 131.93136596679688, 78.99468994140625, 62.54388427734375, 91.67002868652344, 51.12931823730469, 61.94793701171875, 168.92788696289062, -14.7608642578125, -29.748504638671875, -34.78900146484375, 7.9451904296875, 84.492431640625, 69.199951171875, -14.7093505859375, -27.7481689453125, 223.61480712890625, -2.220062255859375, 131.28594970703125, 138.51123046875, 35.218597412109375, 170.77511596679688, 75.44921875, -10.429527282714844, -27.9647159576416, 175.24072265625, -85.64422607421875, 34.7525634765625, -29.5667724609375, -41.23613739013672, -57.8211669921875, 182.66500854492188, 163.41741943359375, 32.786407470703125, 160.15130615234375, 89.65423583984375, 117.44239044189453, 32.618499755859375, 81.27081298828125, -6.0611724853515625, -33.983154296875, 136.63021850585938, 188.27169799804688, -39.617950439453125, 59.55859375, 199.164794921875, -61.168304443359375, 72.4144287109375, 221.84588623046875, 55.546630859375, 74.07061767578125, 85.37677001953125, 101.98515319824219, 30.58294677734375, 38.453338623046875, 70.75325012207031, 86.6571044921875, 18.140396118164062, 74.806640625, 81.45230102539062, -24.234848022460938, -50.55933380126953, 118.99341583251953, -141.62664794921875, 183.08453369140625, -5.792022705078125, 102.15450286865234, -75.15194702148438, -64.7529296875, 8.526058197021484, 289.01898193359375, -60.92820739746094, -11.632827758789062, 82.69564819335938, -4.065216064453125, -23.911895751953125, -22.78619384765625, 20.7564697265625, 226.1337432861328, 0.0, -66.3790283203125, 263.0121765136719, 180.92271423339844, 15.95904541015625, 183.27688598632812, 75.59646606445312, -172.19308471679688, 151.49810791015625, 57.50079345703125, 27.905319213867188, -3.715728759765625, -26.231201171875, 231.87738037109375, 122.04879760742188, 85.97702026367188, -60.100616455078125, -111.16177368164062, 85.85676574707031, 176.71011352539062, 277.0565490722656, 211.0428466796875, 34.2174072265625, 31.031692504882812, -19.89208984375, 142.3653106689453, -29.95587158203125, -26.561553955078125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000343.npy"}
{"epoch": 0.7183246073298429, "step": 344, "batch_size": 128, "mean": 64.38751220703125, "std": 101.5553970336914, "min": -205.36630249023438, "p10": -58.60226821899414, "median": 66.48898315429688, "p90": 192.68802642822266, "max": 275.33514404296875, "pos_frac": 0.7109375, "sample": [126.08041381835938, -14.672882080078125, 186.02261352539062, 22.810760498046875, 115.30010986328125, 36.09846496582031, -162.42794799804688, 58.54951477050781, 120.25381469726562, 88.17152404785156, -185.95474243164062, 159.84585571289062, 139.74630737304688, 102.339599609375, -15.963165283203125, -90.64019012451172, 255.23353576660156, -17.743728637695312, -37.62353515625, 246.79498291015625, 195.7789306640625, -1.3352813720703125, 38.695526123046875, -95.815673828125, 103.39559936523438, 10.9189453125, 47.49078369140625, 141.01828002929688, -52.435028076171875, 251.629150390625, 150.10263061523438, 20.971466064453125, 11.75518798828125, 128.41873168945312, 252.1414794921875, 97.53079223632812, 162.55014038085938, 117.81702423095703, 48.065185546875, 1.8843536376953125, -60.992950439453125, 120.34578704833984, -39.9185791015625, 98.60232543945312, 86.7232666015625, -118.66116333007812, 96.9019775390625, -26.160568237304688, -25.8516845703125, -91.78253173828125, -5.5284423828125, 54.957183837890625, -25.020061492919922, 110.54141235351562, -69.102783203125, 179.358642578125, 13.081817626953125, -33.38648986816406, -64.71894836425781, 195.04241943359375, 104.12515258789062, -57.57769012451172, 122.84310913085938, 150.8636474609375, 151.0248260498047, 29.028564453125, 120.28804016113281, -2.949066162109375, -205.36630249023438, 77.14828491210938, 84.1039047241211, -112.09597778320312, 275.33514404296875, 152.19012451171875, -37.79261016845703, 208.41574096679688, 169.8104705810547, -23.45526123046875, 143.94076538085938, 172.76837158203125, 88.46417236328125, 256.9761657714844, 12.848846435546875, 151.48910522460938, -2.432464599609375, 8.425048828125, 162.93890380859375, 78.35507202148438, 29.490631103515625, 34.18107604980469, 232.44635009765625, 54.3568115234375, 267.1950378417969, 60.10038757324219, 105.2947998046875, 135.6067657470703, 20.631332397460938, -46.26238250732422, 145.97933959960938, 222.54266357421875, -14.814720153808594, 29.599105834960938, -42.4150390625, 2.2745437622070312, 144.34506225585938, 18.417236328125, 134.2428436279297, -36.067138671875, 191.6790008544922, 79.28297424316406, 252.81072998046875, 110.12405395507812, 39.49114990234375, 75.20474243164062, -17.2041015625, 77.54476928710938, 72.87757873535156, -64.3280029296875, 53.796630859375, 16.624855041503906, 121.46710968017578, -33.372161865234375, 43.998268127441406, 137.5386962890625, 101.68521118164062, -19.07152557373047, 191.63760375976562, -124.27496337890625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000344.npy"}
{"epoch": 0.7204188481675393, "step": 345, "batch_size": 128, "mean": 55.19048309326172, "std": 90.7862548828125, "min": -143.88522338867188, "p10": -55.34515380859375, "median": 45.03551483154297, "p90": 171.74169158935547, "max": 323.71136474609375, "pos_frac": 0.7421875, "sample": [29.25555419921875, 67.44326782226562, 227.49984741210938, -86.24404907226562, -6.3325653076171875, -33.57606506347656, 13.73468017578125, 89.57574462890625, 323.71136474609375, 49.53672790527344, 22.370025634765625, -14.753265380859375, 246.27252197265625, 146.89044189453125, 149.857177734375, 175.64117431640625, 18.993453979492188, 50.604034423828125, -143.88522338867188, 34.45109176635742, 204.1497344970703, 197.55218505859375, 54.19248962402344, -17.174362182617188, 118.82135009765625, 292.6219482421875, 71.39289855957031, -138.06231689453125, -7.279510498046875, 69.32252502441406, 153.26678466796875, 169.037353515625, 207.4669189453125, 117.67919921875, 66.469970703125, -37.751617431640625, 63.96807861328125, 145.24627685546875, 23.85418701171875, 65.08889770507812, 70.47433471679688, -46.86309814453125, 171.3076629638672, 131.5007781982422, 101.37054443359375, 73.47108459472656, 91.72319030761719, 103.13223266601562, -29.472381591796875, -76.24749755859375, 133.33230590820312, 157.39389038085938, 138.837890625, -54.6429443359375, 117.86311340332031, -57.82708740234375, 43.075103759765625, 138.65798950195312, -120.93792724609375, 229.8023681640625, 49.6138916015625, 174.40628051757812, -0.4117012023925781, 21.765377044677734, 14.33245849609375, -1.2267684936523438, 10.286933898925781, -6.75555419921875, 69.56317138671875, -1.375213623046875, -29.71002960205078, 68.95037841796875, -24.529455184936523, 52.8858642578125, 157.69454956054688, 172.75442504882812, 26.95721435546875, -63.866302490234375, 20.383697509765625, 32.585548400878906, 68.0960693359375, 59.35430908203125, 46.99592590332031, -59.63946533203125, 34.00115966796875, 97.16604614257812, -26.57535171508789, 40.899139404296875, 134.40057373046875, 85.2085189819336, 297.70111083984375, 132.58773803710938, 25.336883544921875, 58.16822814941406, -11.190422058105469, 3.9459667205810547, 2.94061279296875, 30.109176635742188, 16.18048095703125, -56.983642578125, 68.87588500976562, 10.64996337890625, 120.61721801757812, 9.196035385131836, -97.24118041992188, 15.157035827636719, 14.480865478515625, 8.235076904296875, 136.66949462890625, 61.82586669921875, -50.24359130859375, 39.254119873046875, 248.0767822265625, -93.47313690185547, 12.756103515625, -45.077049255371094, 50.30426025390625, -66.5423583984375, 26.358963012695312, 150.14715576171875, 57.135345458984375, -58.076446533203125, 1.0447998046875, 1.5329818725585938, 146.63748168945312, 0.121826171875, 121.7105941772461, -45.620635986328125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000345.npy"}
{"epoch": 0.7225130890052356, "step": 346, "batch_size": 128, "mean": 70.19441986083984, "std": 106.70846557617188, "min": -251.42413330078125, "p10": -43.78768615722656, "median": 61.85563659667969, "p90": 201.32662963867188, "max": 333.5601806640625, "pos_frac": 0.7578125, "sample": [-42.947998046875, -6.906749725341797, -12.558048248291016, -118.5841064453125, 15.661283493041992, 44.387054443359375, -69.50173950195312, 257.69354248046875, -15.371627807617188, 128.5925750732422, 101.33062744140625, 180.8150634765625, 15.917236328125, 145.39544677734375, 113.2154541015625, 45.294952392578125, 35.56385803222656, 63.106414794921875, 226.75625610351562, -23.71429443359375, 128.80076599121094, -251.42413330078125, 45.26990509033203, 216.49383544921875, 17.76275634765625, -197.26846313476562, 207.26654052734375, 45.775421142578125, -23.63690185546875, 22.403419494628906, 173.70822143554688, 318.51458740234375, 35.132843017578125, 98.5670166015625, 247.95742797851562, 200.55657958984375, 137.02508544921875, 88.19955444335938, 0.7409992218017578, 142.4891357421875, 33.052337646484375, 137.18626403808594, 47.46891784667969, -14.400146484375, 159.055419921875, -68.26232147216797, 73.45034790039062, 120.36326599121094, 141.11788940429688, 128.29086303710938, 134.30032348632812, 91.98895263671875, -8.726293563842773, 22.137832641601562, 37.07696533203125, 192.77224731445312, -16.45256805419922, 3.9678878784179688, -38.03126525878906, 20.13470458984375, 51.764739990234375, 126.56918334960938, 4.87835693359375, 69.16586303710938, -25.14482879638672, -23.185562133789062, 211.607177734375, -24.829727172851562, 97.04962158203125, 8.082687377929688, -178.28125, -37.63897705078125, 18.10537338256836, 194.71121215820312, 88.72808837890625, 197.95941162109375, 333.5601806640625, 163.2841339111328, -1.70062255859375, 118.35986328125, 29.671207427978516, 265.5101318359375, 277.01812744140625, 203.1234130859375, 73.93844604492188, 156.91526794433594, 109.99867248535156, 219.2601318359375, -102.50550842285156, 112.3040771484375, 134.351318359375, 137.162109375, 166.587158203125, 178.95449829101562, 30.9324951171875, 165.30203247070312, 60.6048583984375, 30.292388916015625, 151.777587890625, -43.953948974609375, -207.86561584472656, 37.97503662109375, 1.007232666015625, -43.7164306640625, -39.754547119140625, -147.0404815673828, 36.200836181640625, 73.60411834716797, 175.73916625976562, 145.70355224609375, 257.6690673828125, 20.256622314453125, -48.27622985839844, 156.2760009765625, 89.77462768554688, 78.37322998046875, 4.5962677001953125, 118.05038452148438, 0.0, 111.32966613769531, -52.36589813232422, 147.02334594726562, 20.897315979003906, -71.04557800292969, 145.79086303710938, 60.05877685546875, 34.343204498291016, 195.0208740234375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000346.npy"}
{"epoch": 0.724607329842932, "step": 347, "batch_size": 128, "mean": 56.95897674560547, "std": 95.72866821289062, "min": -196.20590209960938, "p10": -58.89188232421875, "median": 57.20502471923828, "p90": 179.8244415283203, "max": 334.08111572265625, "pos_frac": 0.765625, "sample": [52.100547790527344, 194.96142578125, -0.7558746337890625, 59.57525634765625, 178.29019165039062, 31.92822265625, 217.576171875, 30.597679138183594, -130.36007690429688, -160.17413330078125, 183.40435791015625, 279.44537353515625, 63.24365234375, -85.5965576171875, 0.580841064453125, 44.135009765625, 87.2685546875, 107.71437072753906, 164.98031616210938, 97.57809448242188, 67.29522705078125, 6.9121246337890625, 116.53741455078125, 39.72869873046875, 13.978187561035156, 129.4744873046875, 334.08111572265625, 13.968536376953125, 66.07257843017578, 223.5919952392578, -14.70123291015625, 98.20085144042969, -11.731689453125, 24.065162658691406, 13.151283264160156, 185.88055419921875, 122.6749267578125, 14.967750549316406, 77.61813354492188, -24.595001220703125, 56.07618713378906, -6.236846923828125, 144.74893188476562, 11.587532043457031, 75.24014282226562, 66.15249633789062, -52.79937744140625, 80.91455078125, 149.41143798828125, 129.06732177734375, 74.42294311523438, -52.23345947265625, -86.34197998046875, -121.12844848632812, 199.83004760742188, 175.373291015625, 158.42636108398438, -117.09182739257812, 136.41650390625, 190.40679931640625, 10.931381225585938, 8.758636474609375, 47.39695739746094, 70.352294921875, 78.02749633789062, 107.5523681640625, -196.20590209960938, 44.44232177734375, 14.9969482421875, 228.6722412109375, 236.28785705566406, 47.6783447265625, -29.28418731689453, 58.3338623046875, 253.71112060546875, 69.73822021484375, 53.443115234375, 177.2566680908203, 169.5427703857422, -43.2376708984375, -168.85231018066406, 137.9470977783203, 171.27145385742188, 6.295200347900391, 0.0, 18.125244140625, 24.209564208984375, 80.05166625976562, 62.27093505859375, 78.65985870361328, -105.14471435546875, 43.02081298828125, 106.51248168945312, 195.19284057617188, 94.759033203125, 83.46380615234375, 115.3016357421875, 24.79779052734375, 175.22604370117188, -138.26329040527344, 65.52484130859375, 27.22698974609375, 71.6774673461914, 7.82244873046875, 3.6840362548828125, 18.794998168945312, -61.60369873046875, 61.36370849609375, 94.08660888671875, -7.84906005859375, -2.944751739501953, 9.580472946166992, -108.63223266601562, 8.632438659667969, -12.483673095703125, -15.12127685546875, 153.09835815429688, 164.24285888671875, 116.43698120117188, 19.81145477294922, -50.36590576171875, 99.01078033447266, 46.95439147949219, -65.49053955078125, -1.45147705078125, -57.72967529296875, 106.39483642578125, 60.930633544921875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000347.npy"}
{"epoch": 0.7267015706806282, "step": 348, "batch_size": 128, "mean": 51.12578582763672, "std": 96.56069946289062, "min": -219.71218872070312, "p10": -43.550663757324216, "median": 29.555389404296875, "p90": 185.50588989257812, "max": 396.795166015625, "pos_frac": 0.6875, "sample": [48.603271484375, 74.31314086914062, 2.3442020416259766, -0.208984375, -5.4247894287109375, 10.528999328613281, 123.0262451171875, 8.4901123046875, 240.53176879882812, 60.34912109375, 24.406631469726562, 0.0, 54.11927795410156, 153.12896728515625, -16.145355224609375, 63.93133544921875, 4.422479629516602, 70.39138793945312, 141.4130859375, 19.49266815185547, 95.2349853515625, -7.839103698730469, -18.242462158203125, 40.458770751953125, 172.7001953125, 40.6024169921875, 128.46755981445312, 177.4766387939453, 26.786575317382812, -25.254852294921875, -97.003173828125, 60.58815002441406, 4.577392578125, -13.6654052734375, 204.05203247070312, -15.723953247070312, 166.5382537841797, 25.38738250732422, 166.85452270507812, 164.14495849609375, 35.1956787109375, 153.09304809570312, 32.059417724609375, 8.32156753540039, 42.43975830078125, 109.53330993652344, -19.829193115234375, 44.364288330078125, 28.13427734375, 6.82977294921875, 215.2860107421875, -20.9024658203125, 99.14682006835938, 8.1707763671875, -11.461227416992188, 12.996484756469727, -7.0389404296875, 255.88494873046875, -65.82620239257812, -50.4197998046875, 307.3485412597656, 52.33465576171875, 31.074234008789062, 37.012420654296875, 162.9871063232422, 171.73614501953125, -129.57037353515625, 188.1644287109375, 4.856744766235352, 40.3486328125, -99.355712890625, 21.30023193359375, -26.4847412109375, 254.64288330078125, -20.67633056640625, -0.5255126953125, 104.16482543945312, 230.05996704101562, 195.5558624267578, -87.98115539550781, -137.15505981445312, -10.258384704589844, 81.77174377441406, -51.719024658203125, 0.4084320068359375, 9.499359130859375, 116.25970458984375, 122.95306396484375, -80.89828491210938, -21.36309814453125, 60.311187744140625, 105.01564025878906, 11.50726318359375, 3.91943359375, -55.89552688598633, 3.082183837890625, 86.64520263671875, 34.728546142578125, 35.117340087890625, 15.51593017578125, -16.07574462890625, 190.0613250732422, 40.67755126953125, -45.89646911621094, -9.749176025390625, 184.36651611328125, 136.71481323242188, -3.4648170471191406, -33.478790283203125, -41.48248291015625, -42.545318603515625, 81.91603088378906, 25.535430908203125, -54.74308776855469, 39.82769775390625, 30.97650146484375, 25.159423828125, 269.41265869140625, 44.50273132324219, -38.682586669921875, 72.99609375, -21.627464294433594, 196.80828857421875, -22.5960693359375, 178.16482543945312, 163.96847534179688, -219.71218872070312, 396.795166015625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000348.npy"}
{"epoch": 0.7287958115183246, "step": 349, "batch_size": 128, "mean": 76.02519989013672, "std": 106.44974517822266, "min": -255.22674560546875, "p10": -40.270703125, "median": 64.11119842529297, "p90": 216.95302124023436, "max": 402.96490478515625, "pos_frac": 0.796875, "sample": [67.92391967773438, 92.44062805175781, 49.048431396484375, -173.57965087890625, 116.97970581054688, -109.14923095703125, 91.94522094726562, 6.277168273925781, 155.7158203125, 21.4154052734375, 175.81390380859375, 216.75241088867188, 151.82943725585938, 81.07571411132812, -36.12127685546875, -7.23369026184082, 159.8875732421875, 42.41351318359375, 112.47952270507812, 290.5146179199219, 229.63156127929688, 97.42617797851562, 26.098724365234375, 13.539321899414062, -8.19207763671875, 167.7709197998047, 223.48858642578125, 121.63385009765625, -10.197906494140625, 249.08013916015625, 133.75390625, 217.42111206054688, -42.868896484375, 15.338760375976562, 322.5003356933594, 67.70892333984375, 178.16635131835938, -46.75982666015625, 30.916404724121094, 133.83740234375, 135.77099609375, 59.26434326171875, -31.8670654296875, -37.95208740234375, -37.41058349609375, 214.70620727539062, 205.21505737304688, 62.61332702636719, 55.9561767578125, 2.65264892578125, 69.44671630859375, -108.9993896484375, 57.4384765625, 75.82240295410156, 1.13873291015625, 158.11297607421875, 168.224609375, 35.461029052734375, 106.91212463378906, 65.60906982421875, 150.08984375, -53.45599365234375, 290.63580322265625, 402.96490478515625, 49.32068634033203, 182.90301513671875, 74.82818603515625, -143.62100219726562, 27.079235076904297, 30.112396240234375, 150.54495239257812, 104.38215637207031, -13.160064697265625, 9.354736328125, -84.44146728515625, 14.763496398925781, 8.350387573242188, 54.13433837890625, -108.494140625, 97.43695068359375, -39.62115478515625, 267.7052001953125, 25.1016845703125, 108.02751159667969, -255.22674560546875, 57.210693359375, 7.0648040771484375, 2.726757049560547, 299.0982971191406, 19.999847412109375, 74.96539306640625, 113.099853515625, -5.648895263671875, 60.320068359375, 83.376220703125, 80.71823120117188, 71.07542419433594, 133.7657470703125, 51.809478759765625, 150.62075805664062, 33.878196716308594, 141.6434326171875, 142.00677490234375, 277.6843566894531, 224.8338623046875, 178.8623046875, 13.025726318359375, 154.49887084960938, 0.0, 184.74911499023438, -41.78631591796875, 138.42376708984375, -48.043487548828125, 168.34051513671875, 14.748878479003906, 43.954978942871094, 1.921722412109375, 19.56085205078125, 41.920501708984375, 163.90435791015625, 222.00326538085938, 42.807533264160156, -37.86479949951172, 113.99591064453125, -118.9425048828125, -16.531051635742188, 49.64593505859375, 187.22579956054688], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000349.npy"}
{"epoch": 0.7308900523560209, "step": 350, "batch_size": 128, "mean": 64.72966766357422, "std": 106.4380111694336, "min": -191.12704467773438, "p10": -64.39685668945312, "median": 65.07182312011719, "p90": 210.6261749267578, "max": 328.8424987792969, "pos_frac": 0.7265625, "sample": [150.31402587890625, 120.74247741699219, 9.090080261230469, -28.092666625976562, 23.18665313720703, 16.291114807128906, -146.0462188720703, 66.4525375366211, 167.19229125976562, 241.55307006835938, 154.39901733398438, -76.2166976928711, -108.97628784179688, 40.319427490234375, 127.15864562988281, 113.87811279296875, -25.374679565429688, -24.705398559570312, 122.09088134765625, 109.47340393066406, 55.28802490234375, 121.66239929199219, 271.63006591796875, 12.949951171875, 204.86093139648438, 210.80764770507812, 38.09489440917969, 86.255615234375, 86.50173950195312, 203.4774169921875, -34.87214660644531, 134.17999267578125, 52.97927474975586, 70.86308288574219, -83.45112609863281, 174.3421630859375, 68.45774841308594, 32.403053283691406, 82.59765625, 215.96453857421875, 184.40103149414062, 41.71760559082031, -13.184906005859375, 64.94677734375, 328.8424987792969, 137.23660278320312, 41.02978515625, 120.32366943359375, -117.71038818359375, 19.206035614013672, 129.28488159179688, 71.41677856445312, 73.20628356933594, -180.94207763671875, 69.77816772460938, -52.40639877319336, 65.02352905273438, 83.607421875, 17.44140625, 231.99639892578125, -38.983734130859375, 87.68407440185547, 299.00347900390625, -116.8868408203125, -73.75428009033203, 87.92665100097656, 72.06317138671875, 11.132339477539062, 14.036346435546875, 221.32554626464844, 31.993408203125, -100.63870239257812, 68.46687316894531, 24.227783203125, 246.3287811279297, 9.0186767578125, 271.30023193359375, 150.97601318359375, 153.0640869140625, 263.34197998046875, 26.7314453125, -18.01904296875, -27.891098022460938, 119.65438842773438, -191.12704467773438, 215.4436798095703, -67.50408935546875, 16.622955322265625, -4.2738494873046875, -56.40423583984375, 132.85855102539062, 173.11143493652344, 121.59507751464844, 10.6431884765625, -25.02301025390625, -161.69900512695312, 186.50576782226562, -5.67022705078125, 52.686279296875, 248.2440185546875, 190.41561889648438, 49.44207763671875, -11.868026733398438, 95.57936096191406, 210.54840087890625, 209.49993896484375, 196.48721313476562, 15.36590576171875, -43.30328369140625, -63.065185546875, 107.07254028320312, -34.690277099609375, -44.631492614746094, -22.689208984375, 15.0858154296875, 95.34126281738281, -34.656982421875, 102.6929931640625, -128.43966674804688, 91.88868713378906, 65.1201171875, 64.37413024902344, 131.36663818359375, -28.5625, 141.40821838378906, -41.52862548828125, 97.70704650878906, 58.38787841796875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000350.npy"}
{"epoch": 0.7329842931937173, "step": 351, "batch_size": 128, "mean": 43.770713806152344, "std": 110.5921401977539, "min": -262.000732421875, "p10": -87.87012557983398, "median": 33.24176025390625, "p90": 198.80136871337885, "max": 337.903564453125, "pos_frac": 0.6484375, "sample": [0.0, 182.6429443359375, 2.203338623046875, 273.88018798828125, 18.959259033203125, -20.47649383544922, 85.5071029663086, -35.22071838378906, 9.565183639526367, 192.03656005859375, -92.31829833984375, 39.83026123046875, -27.104278564453125, -100.27655029296875, -106.74801635742188, -45.021759033203125, 160.3707733154297, -184.52947998046875, 143.5047149658203, 57.137237548828125, 216.23300170898438, -20.39276123046875, 22.493728637695312, 60.29986572265625, 0.0, 8.44500732421875, 95.64376831054688, 97.85861206054688, -24.86810302734375, 34.546478271484375, 37.31566619873047, 154.43222045898438, 31.049224853515625, 214.58592224121094, 38.74299621582031, 130.5231170654297, -1.6712474822998047, 187.93341064453125, -87.85871124267578, -24.8829345703125, 31.937042236328125, 57.60716247558594, -57.293426513671875, 275.353271484375, -12.478790283203125, 36.76979064941406, -47.97357177734375, 2.7156982421875, 115.37399291992188, -111.25105285644531, 22.14630126953125, 109.84319305419922, -67.8975830078125, -164.52935791015625, 74.48514556884766, 70.39022064208984, 132.61428833007812, -0.2980842590332031, 4.0754852294921875, 236.69107055664062, 58.62181854248047, -55.5828857421875, -93.89111328125, -82.89852905273438, 154.3848876953125, 21.83135986328125, 67.87608337402344, -87.89675903320312, -107.852294921875, 23.695343017578125, 60.271480560302734, 101.6995849609375, 258.48931884765625, 30.417495727539062, -4.305318832397461, -8.5841064453125, 150.75277709960938, 10.25152587890625, -30.593109130859375, 22.539154052734375, -163.92230224609375, -68.08897399902344, 293.7268371582031, -262.000732421875, 40.900909423828125, 45.05029296875, 259.19451904296875, 24.436431884765625, 76.48822021484375, -18.48492431640625, 73.9552001953125, 15.810501098632812, 66.58172607421875, 122.90841674804688, -251.945556640625, 86.60010528564453, 43.033905029296875, 14.999069213867188, 242.6552276611328, -73.31298828125, 15.40789794921875, 109.52066040039062, 37.01795196533203, 76.14994812011719, -34.31695556640625, 55.41755676269531, 337.903564453125, 161.57229614257812, 185.45291137695312, 170.35690307617188, 35.25896453857422, 249.042236328125, 90.63018798828125, -19.57025146484375, -31.5267333984375, 92.46978759765625, 227.71890258789062, -85.53839111328125, -7.704587936401367, 130.48362731933594, 39.9818115234375, -21.937152862548828, 0.0, 131.0006103515625, -60.221588134765625, -103.5811538696289, 246.79348754882812, 84.40434265136719], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000351.npy"}
{"epoch": 0.7350785340314137, "step": 352, "batch_size": 128, "mean": 76.48738098144531, "std": 106.42184448242188, "min": -190.795654296875, "p10": -55.99091186523437, "median": 80.8876953125, "p90": 220.43279418945312, "max": 313.60211181640625, "pos_frac": 0.765625, "sample": [117.95333862304688, 133.29638671875, 222.98187255859375, -82.181396484375, 169.35272216796875, 83.59364318847656, 122.42971801757812, 113.07489776611328, 64.0975341796875, 243.44732666015625, 184.00192260742188, 90.2655029296875, 49.347816467285156, 210.26971435546875, 28.571807861328125, 245.24293518066406, -190.795654296875, -129.12203979492188, 52.70088195800781, 125.85198974609375, 131.76263427734375, 76.05609130859375, 39.15301513671875, 250.3470458984375, 219.34033203125, 139.3523712158203, 141.1774444580078, 83.12792205810547, 281.70660400390625, 45.19187927246094, -103.9227294921875, -31.616378784179688, 41.258216857910156, 294.97161865234375, 66.97889709472656, 82.95884704589844, 3.48040771484375, 62.5819091796875, -4.016754150390625, 3.8928375244140625, 275.7552185058594, -37.66642761230469, -62.39434814453125, 298.11773681640625, 5.306549072265625, 91.03277587890625, 251.06137084960938, 92.90969848632812, 43.69464111328125, 51.4356689453125, 31.03150177001953, 186.936767578125, 103.15492248535156, 55.12104797363281, 248.22738647460938, 109.2257080078125, 51.5340576171875, 173.99801635742188, 82.54592895507812, -135.234130859375, 132.78680419921875, 275.847900390625, 131.26821899414062, 217.28890991210938, 105.91995239257812, -11.975555419921875, -30.30010986328125, 46.49273681640625, 107.42364501953125, -115.79010009765625, 0.3738555908203125, 117.51144409179688, 295.855712890625, -114.29241943359375, -4.687408447265625, 110.70782470703125, 64.82694244384766, 0.0, 104.83038330078125, 150.177978515625, 113.81475830078125, -10.94253158569336, -19.06675910949707, -83.38998413085938, 118.78915405273438, 25.70709228515625, 172.55506896972656, 128.98843383789062, -13.6717529296875, 127.1192626953125, -0.0916748046875, -12.534622192382812, -37.0787353515625, -93.47711181640625, 140.88385009765625, 161.93392944335938, -171.0966796875, 22.669876098632812, 313.60211181640625, 23.012588500976562, -125.71023559570312, 18.413116455078125, 0.0, 119.78970336914062, 34.643798828125, 159.444091796875, -17.112045288085938, -44.02491760253906, 136.60496520996094, 29.712753295898438, 44.249359130859375, 177.4368896484375, 168.85446166992188, 103.85306549072266, -140.3885498046875, -53.24658203125, 159.63845825195312, 31.17425537109375, 74.43890380859375, 46.515167236328125, 109.44061279296875, 79.22946166992188, 178.4046630859375, 151.61810302734375, 16.97974395751953, 94.86096954345703, 91.43384552001953, 58.18017578125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000352.npy"}
{"epoch": 0.7371727748691099, "step": 353, "batch_size": 128, "mean": 62.97319412231445, "std": 102.92705535888672, "min": -200.88287353515625, "p10": -47.740087890624984, "median": 53.49724578857422, "p90": 187.08165283203124, "max": 364.6100769042969, "pos_frac": 0.71875, "sample": [132.62399291992188, 100.30950927734375, 73.00672912597656, 119.699951171875, 103.49075317382812, -95.73812866210938, 129.5771484375, -200.88287353515625, -90.21200561523438, -39.85650634765625, 72.9730224609375, -43.99371337890625, 182.7373046875, 24.143569946289062, 295.1763916015625, 58.22056579589844, -13.26971435546875, 24.052276611328125, -20.928359985351562, -56.48162841796875, -15.042081832885742, -29.0382080078125, 6.498138427734375, -16.652099609375, 123.73184204101562, -14.087593078613281, 2.082122802734375, 57.970794677734375, 105.92794799804688, 78.55926513671875, 16.029632568359375, 0.0, 56.25807189941406, 179.99343872070312, -33.59794616699219, 76.8729248046875, 314.6220397949219, 148.7222900390625, 151.33584594726562, 231.10311889648438, 10.36505126953125, 77.17887115478516, -18.011520385742188, -162.8096923828125, -176.6557159423828, 79.71690368652344, 41.020477294921875, 186.222412109375, 58.792938232421875, 34.933502197265625, 34.83699035644531, 14.0906982421875, 158.8095703125, -13.603302001953125, 47.75750732421875, 93.55089569091797, 92.74560546875, 296.198974609375, 194.30661010742188, 128.0164031982422, 120.9215087890625, 119.4893798828125, -126.35299682617188, -59.93255615234375, 66.506591796875, -6.65478515625, -5.04400634765625, 50.8997802734375, 58.00726318359375, -103.59158325195312, 76.02206420898438, 244.58099365234375, 45.251888275146484, 19.80511474609375, 184.00555419921875, 142.24627685546875, 22.992082595825195, 42.813438415527344, 70.93020629882812, 101.18144226074219, 197.52828979492188, 156.8839111328125, 220.27670288085938, 62.50140380859375, 183.18431091308594, 93.32101440429688, 212.1492919921875, 83.40792846679688, -59.05810546875, -26.218353271484375, -7.829349517822266, 28.7176513671875, 20.608154296875, -43.67669677734375, 0.1685791015625, 91.68832397460938, -88.33512878417969, 141.1295928955078, -12.640777587890625, 38.29852294921875, 80.59294128417969, -16.862548828125, 162.33358764648438, 364.6100769042969, 243.1947021484375, -17.008384704589844, 326.7752685546875, 180.12643432617188, 164.74856567382812, 17.581451416015625, -6.4897003173828125, -64.70745849609375, -35.73615264892578, 168.4586639404297, -6.068145751953125, 8.748390197753906, 152.04812622070312, -119.91795349121094, 35.16276168823242, 12.307907104492188, 150.67706298828125, 51.05433654785156, 51.24653625488281, 9.3524169921875, 55.747955322265625, 43.54730224609375, 189.0865478515625, 130.37457275390625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000353.npy"}
{"epoch": 0.7392670157068063, "step": 354, "batch_size": 128, "mean": 45.909793853759766, "std": 107.39115905761719, "min": -228.07550048828125, "p10": -80.50501785278318, "median": 32.0159797668457, "p90": 180.76087646484373, "max": 329.919189453125, "pos_frac": 0.6953125, "sample": [11.811485290527344, -60.585968017578125, -138.9136962890625, 184.14505004882812, -18.888568878173828, 247.35433959960938, 115.03927612304688, 22.797271728515625, -44.28717041015625, -19.330535888671875, 27.220260620117188, -23.265472412109375, -164.70904541015625, 70.49478149414062, 67.7034912109375, 253.49765014648438, 78.96971130371094, 54.90898132324219, 28.9349308013916, 306.82745361328125, -7.109764099121094, -7.466400146484375, -51.104583740234375, -68.54109191894531, 70.21124267578125, -147.55416870117188, 134.6080322265625, 51.55267333984375, -97.12862396240234, 168.55752563476562, -98.89559936523438, 113.21527099609375, 112.77951049804688, 329.919189453125, 102.10275268554688, 6.693359375, 31.843536376953125, -6.72216796875, 113.13484191894531, 217.48171997070312, 7.317007064819336, 73.21104431152344, 153.356201171875, -128.17514038085938, 118.62594604492188, 259.1904602050781, 123.1737060546875, 248.17947387695312, 17.0728759765625, -1.0656051635742188, 190.44622802734375, -20.821502685546875, 90.28147888183594, 6.82037353515625, 12.832809448242188, 27.43316650390625, -38.245452880859375, 51.779937744140625, 14.90570068359375, 179.31051635742188, -228.07550048828125, 154.42840576171875, -202.28204345703125, 16.0311279296875, -21.69329833984375, 28.986045837402344, 116.83511352539062, 83.4783935546875, 54.0662841796875, -73.380615234375, 50.63427734375, 1.1580810546875, 110.03997802734375, 34.685768127441406, -24.874534606933594, 16.926849365234375, 167.18482971191406, 67.91714477539062, 99.53204345703125, 33.972991943359375, 144.9866943359375, 22.922565460205078, 169.07626342773438, 68.34112548828125, 106.5567626953125, 98.00143432617188, -57.73828125, -59.2152099609375, 45.05363464355469, -65.99297332763672, 69.98001098632812, -161.00888061523438, 79.54580688476562, 87.31903076171875, -130.33932495117188, 32.18842315673828, 22.588768005371094, 25.684951782226562, 206.26376342773438, 84.00323486328125, 115.77862548828125, 176.0518798828125, -68.998779296875, 3.6468238830566406, 162.98074340820312, 299.3094482421875, -29.878753662109375, -26.169677734375, 94.00070190429688, -45.252410888671875, -151.91403198242188, -107.58782958984375, 19.918380737304688, -46.70415496826172, -52.29248046875, -5.374298095703125, 101.64892578125, 260.44293212890625, 154.12945556640625, 23.359556198120117, 43.837127685546875, 25.09345245361328, 105.96391296386719, 115.73492431640625, 2.0540771484375, -125.54185485839844, 191.6695556640625, 13.827484130859375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000354.npy"}
{"epoch": 0.7413612565445026, "step": 355, "batch_size": 128, "mean": 71.06146240234375, "std": 117.02190399169922, "min": -357.637939453125, "p10": -41.16828727722167, "median": 59.369140625, "p90": 212.4550003051758, "max": 511.72296142578125, "pos_frac": 0.78125, "sample": [30.761749267578125, -169.58074951171875, 162.84298706054688, 86.37969970703125, 77.14181518554688, 511.72296142578125, 266.513671875, 40.62427520751953, 155.36048889160156, 207.39678955078125, -6.47747802734375, 14.00543212890625, 102.98489379882812, 32.2669677734375, 79.28570556640625, 140.79518127441406, 76.54907989501953, -171.27928161621094, 32.578392028808594, 270.83514404296875, 79.16082763671875, 144.38316345214844, -90.70028686523438, 137.62850952148438, 100.489990234375, 143.34800720214844, 4.1033935546875, -10.72967529296875, -38.89710998535156, 102.80148315429688, 172.33322143554688, 151.34942626953125, 0.84124755859375, 17.19800567626953, 58.93597412109375, 29.12738037109375, 188.87655639648438, 112.68663024902344, 2.091583251953125, 75.19526672363281, -171.28419494628906, -4.880401611328125, 16.35082244873047, 363.053955078125, 157.32122802734375, 15.532485961914062, 41.2181396484375, 51.1292724609375, 134.31539916992188, 153.22979736328125, -4.639192581176758, 122.42889404296875, 59.80230712890625, -6.468086242675781, 183.2728271484375, 124.9254150390625, 161.37525939941406, -357.637939453125, 138.97425842285156, -50.245697021484375, 292.73126220703125, 116.10111999511719, 58.689788818359375, 86.43157196044922, 165.13677978515625, 36.52117919921875, 291.51611328125, 2.5277175903320312, 154.4208221435547, 98.19158172607422, 129.83175659179688, 77.57675170898438, 9.51031494140625, 145.72799682617188, -46.46770095825195, -12.44378662109375, 28.138961791992188, 57.486541748046875, 39.0457763671875, -74.06668090820312, -146.10784912109375, 123.69534301757812, -23.54627227783203, 31.936004638671875, 316.8711853027344, 189.07437133789062, 238.97463989257812, 219.73818969726562, 85.3077392578125, 79.95021057128906, 159.63363647460938, -29.849090576171875, -31.018882751464844, 56.71044921875, 12.82769775390625, 255.798828125, 8.324310302734375, 142.40794372558594, -1.4861164093017578, 126.98216247558594, -124.17476654052734, 5.7669677734375, 89.1915283203125, 30.25110626220703, 46.1251220703125, 125.21902465820312, 46.871795654296875, -19.634658813476562, 72.9222412109375, 32.47421646118164, -81.40716552734375, -16.372650146484375, 223.81820678710938, 58.522247314453125, 241.21572875976562, 94.22198486328125, -37.33915710449219, 24.76629638671875, 39.010589599609375, 212.20005798339844, -241.8790283203125, 49.6607666015625, -0.00732421875, 32.841278076171875, 72.213623046875, -94.9801025390625, 213.04986572265625, 79.78179931640625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000355.npy"}
{"epoch": 0.743455497382199, "step": 356, "batch_size": 128, "mean": 47.975608825683594, "std": 86.45143127441406, "min": -186.99819946289062, "p10": -55.4694938659668, "median": 36.349700927734375, "p90": 161.1365005493164, "max": 317.86419677734375, "pos_frac": 0.6640625, "sample": [-6.80902099609375, -38.146392822265625, 197.1205291748047, -35.03485107421875, 176.16534423828125, 75.45201110839844, 126.96165466308594, 166.275390625, 73.46171569824219, -84.62129211425781, 44.355712890625, -6.343475341796875, 203.18634033203125, 10.596811294555664, 21.981201171875, 30.83984375, 110.92051696777344, 121.00624084472656, 101.71890258789062, -21.754531860351562, 36.5877685546875, 0.0, -1.13775634765625, 116.39755249023438, 25.728790283203125, -62.78617858886719, 241.37518310546875, 155.89840698242188, 128.27545166015625, 61.44403076171875, -29.433456420898438, 93.96730041503906, 97.62054443359375, -80.85107421875, -19.2435302734375, -101.02117919921875, 88.33642578125, 148.73931884765625, 160.6355743408203, 103.3668212890625, -104.6202392578125, 104.91619873046875, 36.11163330078125, 107.66561889648438, 156.42489624023438, 58.05865478515625, -8.199151992797852, 194.992919921875, -23.492996215820312, 23.162811279296875, -54.454002380371094, 22.606155395507812, 115.01103210449219, 17.87787628173828, -9.406988143920898, 90.3692626953125, -42.66912841796875, 107.90414428710938, 163.73089599609375, 58.22858428955078, 162.30532836914062, 28.950759887695312, 180.085205078125, 115.48947143554688, 22.256254196166992, 13.238296508789062, -2.622650146484375, 19.54254150390625, -66.07595825195312, 72.0164794921875, 77.94499206542969, -8.671337127685547, -61.581512451171875, 317.86419677734375, -28.808029174804688, 18.552337646484375, -35.83796691894531, 129.00070190429688, 132.2098388671875, 49.64678955078125, 76.68170166015625, 29.507186889648438, 5.02276611328125, 73.76040649414062, -7.72014045715332, 29.27978515625, 87.41388702392578, 22.10832977294922, -24.663976669311523, -1.73858642578125, -57.83897399902344, -111.67131805419922, -119.00555419921875, 137.03321838378906, 24.864959716796875, -186.99819946289062, 58.496551513671875, 108.66473388671875, -13.751230239868164, 110.76081848144531, 70.36569213867188, 78.768310546875, -24.02105712890625, 210.61212158203125, -23.55694580078125, 117.50079345703125, 11.910125732421875, 79.650146484375, 144.15689086914062, -127.9102783203125, 97.48275756835938, 97.67974853515625, 27.16680908203125, -37.754150390625, 106.47770690917969, -49.542083740234375, -25.922988891601562, 12.24431037902832, 47.465179443359375, 124.89544677734375, -7.7585601806640625, -128.004150390625, 140.25326538085938, 65.91973876953125, 169.99859619140625, -8.430839538574219, -30.4129638671875, 180.48081970214844], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000356.npy"}
{"epoch": 0.7455497382198953, "step": 357, "batch_size": 128, "mean": 70.68486022949219, "std": 108.30601501464844, "min": -198.6990966796875, "p10": -58.27577438354491, "median": 63.51258087158203, "p90": 209.1532470703125, "max": 348.98248291015625, "pos_frac": 0.703125, "sample": [72.5037841796875, 209.8486328125, -44.889198303222656, 57.40277099609375, -56.03339385986328, 18.594219207763672, 181.88519287109375, 125.17777252197266, -12.683032989501953, 232.2672119140625, 88.04917907714844, 36.11248779296875, 346.3656005859375, -45.69403076171875, 9.166213989257812, 48.500221252441406, 171.9630126953125, -9.755706787109375, 151.64012145996094, 153.3751220703125, -10.16943359375, -31.395645141601562, 0.0, 253.253662109375, -124.93128967285156, -38.62066650390625, 114.34037780761719, 27.83123016357422, 320.8240661621094, 107.83177185058594, 16.624526977539062, 87.25311279296875, 100.10194396972656, 110.44329833984375, 0.0, 27.469635009765625, -19.029159545898438, -40.366233825683594, 97.86062622070312, 95.32465362548828, 106.914794921875, 10.89501953125, 92.61416625976562, 291.45306396484375, 27.281333923339844, 60.381591796875, 67.3620376586914, 179.937744140625, 55.71855163574219, 151.70623779296875, -92.26304626464844, -101.33598327636719, -37.31666946411133, 0.0, 73.80657958984375, -44.725921630859375, 199.21456909179688, 26.688201904296875, 110.05206298828125, -63.50799560546875, -69.65322875976562, 126.82432556152344, 182.3495330810547, 125.8433837890625, 118.93052673339844, -11.564682006835938, 179.52915954589844, -104.5382080078125, 103.06221008300781, 252.234375, 66.64356994628906, 179.6666259765625, 224.5823211669922, -79.593505859375, 348.98248291015625, 191.33929443359375, 129.1517791748047, 164.28561401367188, 106.902099609375, -71.66226196289062, 9.1785888671875, 1.1748046875, 22.533248901367188, 125.11793518066406, 138.21661376953125, 24.96034812927246, 138.70132446289062, 207.562255859375, 32.235443115234375, -2.3701171875, -53.9583740234375, 73.59942626953125, 270.5907897949219, 7.149026870727539, 4.938255310058594, -23.827377319335938, -7.110527038574219, 313.06512451171875, 283.77294921875, 153.9664306640625, 89.90148162841797, -98.0482177734375, -13.72271728515625, -91.20652770996094, 168.85757446289062, 219.25579833984375, 12.95562744140625, 19.427032470703125, 162.20458984375, -63.975433349609375, 33.52342224121094, 3.19879150390625, 77.023193359375, -88.41806030273438, 32.86705017089844, -34.60906982421875, 95.30740356445312, -0.4251708984375, 208.855224609375, -198.6990966796875, 121.32328796386719, -9.969833374023438, 150.46435546875, 141.37803649902344, 196.62252807617188, -40.42280578613281, 15.206268310546875, 112.65708923339844], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000357.npy"}
{"epoch": 0.7476439790575916, "step": 358, "batch_size": 128, "mean": 75.16089630126953, "std": 103.9054183959961, "min": -196.159912109375, "p10": -41.97946319580077, "median": 68.59303665161133, "p90": 213.56592407226563, "max": 289.1119689941406, "pos_frac": 0.765625, "sample": [-32.7972412109375, 5.112403869628906, 122.50953674316406, 67.97927856445312, 7.627593994140625, 99.83526611328125, 46.2178955078125, 14.985931396484375, 74.9171142578125, 235.35650634765625, 161.82933044433594, 94.060302734375, 222.1987762451172, 94.4525146484375, -75.2076416015625, -19.559669494628906, -20.29278564453125, 203.56033325195312, 61.54193115234375, 34.74940490722656, 11.394683837890625, 36.370697021484375, 8.48809814453125, 119.49214172363281, 68.39545440673828, -66.99029541015625, 90.3233642578125, 81.9471435546875, 159.75686645507812, 98.65022277832031, 169.03564453125, -64.92767333984375, 16.40960693359375, 9.564384460449219, 9.026092529296875, 146.84869384765625, 286.66864013671875, 47.38592529296875, 186.0662841796875, 176.16944885253906, -117.31578826904297, 210.1114501953125, -21.79644775390625, -48.71379089355469, -61.831825256347656, 71.55292510986328, 147.02471923828125, -9.231842041015625, -6.454315185546875, -20.17047119140625, 104.00410461425781, 53.01103973388672, 240.4765625, 31.0704345703125, 68.79061889648438, 176.36544799804688, 222.86727905273438, 34.681884765625, 170.33685302734375, 72.91741943359375, -39.09332275390625, -2.816823959350586, 1.3977508544921875, 71.30746459960938, 200.653076171875, 242.00177001953125, 24.57497787475586, 5.389244079589844, -135.4791259765625, 155.381103515625, 96.13800048828125, 235.00018310546875, 109.29022216796875, 11.606742858886719, 149.08758544921875, -14.93609619140625, 190.7459716796875, 150.5045166015625, -21.511489868164062, 277.18438720703125, 7.619804382324219, -52.35820007324219, -10.5135498046875, -188.19970703125, 170.26702880859375, 161.80490112304688, 213.98236083984375, -196.159912109375, 104.89396667480469, 175.11474609375, 9.919387817382812, 237.1910400390625, 0.0, 16.05859375, 165.81646728515625, 61.38836669921875, 136.058837890625, 61.79682922363281, 175.9130859375, 137.32437133789062, 62.72265625, 150.6783447265625, 141.03713989257812, 219.40145874023438, 213.387451171875, -1.432708740234375, 3.61572265625, 153.8338623046875, 20.3890380859375, -135.17178344726562, 197.5330810546875, 2.4759750366210938, 75.60749816894531, 66.79373168945312, -101.0882797241211, 198.73300170898438, -25.42925262451172, -147.4755859375, -20.073532104492188, 109.84535217285156, 289.1119689941406, 65.59086608886719, 119.11282348632812, 255.04150390625, 211.49441528320312, -22.8851318359375, 74.54144287109375, 42.010833740234375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000358.npy"}
{"epoch": 0.749738219895288, "step": 359, "batch_size": 128, "mean": 67.45097351074219, "std": 95.62367248535156, "min": -186.2510986328125, "p10": -32.285841369628905, "median": 61.68974304199219, "p90": 181.10433959960938, "max": 413.29583740234375, "pos_frac": 0.78125, "sample": [-32.07793426513672, -166.42254638671875, 147.15618896484375, 301.919921875, -22.234039306640625, 24.880126953125, -4.863286972045898, 108.053955078125, 129.298095703125, 139.57992553710938, 25.58056640625, 24.347381591796875, 84.91537475585938, 17.40741729736328, 96.85177612304688, 57.618743896484375, 0.9866943359375, -2.5293960571289062, 62.18011474609375, 47.745391845703125, -2.59765625, 106.14889526367188, -32.770957946777344, -5.829870223999023, 35.69976806640625, 32.53297424316406, 11.364013671875, 66.72467041015625, 25.10498046875, -51.97521209716797, 20.46734619140625, 192.97348022460938, 183.62893676757812, -57.93658447265625, 15.050579071044922, 29.6131591796875, -26.677419662475586, 116.5340576171875, 112.1761474609375, 110.7606201171875, 169.68917846679688, -5.8134307861328125, 171.6695556640625, 413.29583740234375, 261.0661926269531, 0.6530723571777344, 6.1885986328125, 66.17521667480469, 172.4752197265625, 135.31781005859375, 0.53350830078125, 61.3626708984375, 34.255523681640625, 13.44012451171875, 113.54049682617188, -98.44625854492188, 163.43104553222656, 15.477973937988281, 111.47589111328125, 123.50491333007812, 203.88299560546875, 136.46701049804688, 123.22483825683594, -11.381195068359375, 38.66473388671875, 60.22027587890625, 166.16842651367188, 121.87283325195312, 25.67889404296875, -116.7216796875, 119.25445556640625, -4.70660400390625, -90.1964111328125, 135.45578002929688, -18.2540283203125, 61.7940673828125, 211.53933715820312, -76.613037109375, 143.7807159423828, 51.220703125, -49.27153015136719, 34.620758056640625, 46.23744201660156, 66.37785339355469, 99.50543212890625, -105.20089721679688, 145.3978271484375, -17.6492919921875, 89.89605712890625, 27.048446655273438, 134.81715393066406, 97.28053283691406, 7.714515686035156, 194.98931884765625, 69.0926513671875, 74.71865844726562, 82.71148681640625, -16.580745697021484, -29.413787841796875, -42.29669189453125, 107.82415008544922, 87.58558654785156, 62.822425842285156, 102.42782592773438, 176.2254638671875, -186.2510986328125, 47.04827880859375, 180.02236938476562, 200.56817626953125, 45.356689453125, -156.6314697265625, 149.70230102539062, 41.23832702636719, 96.9771728515625, 99.92292785644531, 71.21188354492188, 112.5799560546875, 361.82427978515625, 34.92579650878906, 36.79847717285156, 231.63882446289062, 131.5540313720703, 61.585418701171875, 64.93392944335938, 199.58929443359375, 46.79005432128906, 210.08871459960938, -26.661895751953125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000359.npy"}
{"epoch": 0.7518324607329843, "step": 360, "batch_size": 128, "mean": 56.91771697998047, "std": 107.93402099609375, "min": -433.57012939453125, "p10": -41.83564147949219, "median": 53.497802734375, "p90": 175.10856475830076, "max": 420.208740234375, "pos_frac": 0.734375, "sample": [59.09782409667969, 19.70836639404297, -106.41725158691406, 114.50303649902344, -15.350906372070312, 90.8607177734375, 62.70855712890625, 26.11077880859375, 53.92823028564453, 69.52008056640625, -16.870269775390625, 279.17181396484375, -19.84002685546875, 48.975616455078125, -43.072967529296875, 62.32672119140625, 37.97845458984375, 91.24734497070312, 205.48345947265625, 55.575653076171875, 52.48931884765625, -300.07476806640625, 37.34228515625, 3.6618576049804688, 12.294815063476562, 147.945556640625, 22.60127830505371, 15.972393035888672, 22.57574462890625, 135.07843017578125, 18.481407165527344, -16.051055908203125, -117.01339721679688, -10.728904724121094, -56.39873504638672, 53.2122802734375, 7.982208251953125, 93.73275756835938, -64.15484619140625, 74.77755737304688, 159.20919799804688, 53.7833251953125, 33.73779296875, 177.7537841796875, 89.0118408203125, 12.843414306640625, 394.84722900390625, -9.062164306640625, 91.38200378417969, -126.7464599609375, -67.79391479492188, 150.82867431640625, 88.19302368164062, 0.0, 104.99313354492188, 61.2630615234375, -433.57012939453125, 70.418212890625, 173.9748992919922, -7.636474609375, -64.12992858886719, -177.188720703125, 0.0, 7.149681091308594, 420.208740234375, -76.7470703125, 15.109710693359375, 15.693450927734375, 192.90771484375, 96.28750610351562, 168.067626953125, 63.573638916015625, -41.30535888671875, 24.45562744140625, -9.252120971679688, 61.84722900390625, 206.52389526367188, -30.08428955078125, 135.63497924804688, 12.243072509765625, -2.678457260131836, 122.38851928710938, 19.179718017578125, 43.20105743408203, 227.56805419921875, 46.63447570800781, 60.6787109375, 295.3282470703125, -70.74322509765625, 32.68080139160156, 153.44512939453125, 72.39724731445312, 34.53422546386719, 119.89195251464844, -8.74810791015625, -9.988113403320312, 79.30072021484375, 216.678955078125, 37.5118408203125, 147.79495239257812, 86.42138671875, 58.492393493652344, -23.22393798828125, 208.20510864257812, -11.3450927734375, 149.8377685546875, 48.2283935546875, 127.01126098632812, 95.0252685546875, 150.42352294921875, 145.42611694335938, 122.80907440185547, 152.8204345703125, 205.30661010742188, 136.85165405273438, 55.31952667236328, -6.8693695068359375, 55.2135124206543, 76.04409790039062, -21.958404541015625, 254.103515625, 26.864906311035156, -1.6390209197998047, 78.2056884765625, -0.7723846435546875, 16.876365661621094, 154.37271118164062, 82.56231689453125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000360.npy"}
{"epoch": 0.7539267015706806, "step": 361, "batch_size": 128, "mean": 74.851318359375, "std": 111.21549987792969, "min": -219.20932006835938, "p10": -68.72064819335938, "median": 69.52872467041016, "p90": 231.82974853515626, "max": 379.1685791015625, "pos_frac": 0.734375, "sample": [62.19978332519531, 21.4783935546875, -84.45578002929688, 86.16259765625, 126.09501647949219, 81.42908477783203, 11.327751159667969, 147.33737182617188, 18.447616577148438, 94.75946044921875, -68.45904541015625, 6.639076232910156, 58.677520751953125, -0.2168121337890625, -43.64598083496094, 103.925537109375, 103.28604125976562, -37.39874267578125, 62.1558837890625, 97.25991821289062, 33.13428497314453, 43.07177734375, 27.45564842224121, 113.66867065429688, 279.1916198730469, 135.50782775878906, 173.10452270507812, 91.34881591796875, -146.03030395507812, 213.3187255859375, 276.0404357910156, 147.34999084472656, 68.60809326171875, -28.03546142578125, 7.654415130615234, -32.89448547363281, -116.8302001953125, 218.8515625, 125.01008605957031, 121.570068359375, 220.02333068847656, 241.95355224609375, -45.36921691894531, 168.53245544433594, 117.47087097167969, 0.0, 49.555145263671875, 47.11419677734375, 117.20257568359375, -92.880126953125, 128.187255859375, -0.281158447265625, 264.44952392578125, -2.7126522064208984, 232.270751953125, 66.76651000976562, -219.20932006835938, 124.84307861328125, 28.152923583984375, -86.65234375, -2.014404296875, 231.6407470703125, 66.47156524658203, -61.404022216796875, 108.47613525390625, 39.625518798828125, -81.58353424072266, 31.111480712890625, 201.00473022460938, 293.3216857910156, -65.4854736328125, 4.131690979003906, -9.70880126953125, 173.33834838867188, 171.3821563720703, 150.93226623535156, 139.47900390625, 285.6000061035156, 5.443359375, 80.29766845703125, -19.016448974609375, 104.17889404296875, -26.510021209716797, 0.0, 141.7152099609375, 379.1685791015625, 112.24479675292969, 0.995147705078125, 256.7613830566406, 72.17707824707031, -100.09837341308594, 88.88180541992188, 93.82992553710938, -82.07936096191406, -124.52142333984375, 20.953369140625, 13.131423950195312, 162.04827880859375, -1.43408203125, 249.6796112060547, 194.317138671875, 39.517425537109375, -69.3310546875, 35.270263671875, 74.57892608642578, 258.97607421875, 190.31292724609375, 141.4373016357422, 145.63519287109375, 180.626220703125, 22.209047317504883, 240.6492919921875, 46.41986083984375, -63.696014404296875, 70.32353210449219, 68.73391723632812, 174.70025634765625, 267.712646484375, 184.46209716796875, 72.6222152709961, -75.7718505859375, 140.23086547851562, 226.98526000976562, 133.4677276611328, -56.81824493408203, -0.8018417358398438, 41.964141845703125, -165.44818115234375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000361.npy"}
{"epoch": 0.7560209424083769, "step": 362, "batch_size": 128, "mean": 64.19023132324219, "std": 88.72509765625, "min": -161.44522094726562, "p10": -44.85751953125, "median": 52.56495666503906, "p90": 174.82638702392578, "max": 346.46441650390625, "pos_frac": 0.7890625, "sample": [-12.683242797851562, 111.938232421875, 49.93098831176758, 114.86431884765625, 155.22792053222656, -24.362159729003906, 95.07256317138672, 52.01446533203125, 151.3807373046875, 7.1798095703125, 168.66262817382812, 175.15992736816406, -44.06829833984375, 45.780174255371094, -46.69903564453125, 147.7840576171875, 48.87744140625, 56.46612548828125, 31.345558166503906, 87.39454650878906, 149.5529327392578, 39.54588317871094, 69.89036560058594, 42.762481689453125, -97.3101806640625, 50.1722412109375, -62.141944885253906, 121.37728881835938, 143.5867919921875, 56.66838073730469, 20.520309448242188, -161.44522094726562, 135.806396484375, 61.13800048828125, 176.2501220703125, 83.61528015136719, 34.61505126953125, -9.860225677490234, 115.74514770507812, 12.426193237304688, -10.209381103515625, 138.1136016845703, 293.481201171875, 116.23638916015625, 74.24639892578125, -9.389434814453125, 9.80844497680664, 20.22625732421875, 47.3724365234375, 140.50634765625, 346.46441650390625, 78.294921875, -40.68896484375, 109.75872802734375, 196.53802490234375, 38.07647705078125, -42.430755615234375, -31.14715576171875, -66.93351745605469, 197.58978271484375, 19.089691162109375, 237.75299072265625, 33.524444580078125, 46.263607025146484, 36.974151611328125, 147.29550170898438, 53.115447998046875, -92.04345703125, 311.828857421875, 13.93280029296875, 174.68344116210938, 137.17449951171875, 132.6611328125, 190.63583374023438, 37.06971740722656, -8.751144409179688, 75.54718017578125, 165.82315063476562, 24.72784423828125, 40.698699951171875, -27.96209716796875, 6.040069580078125, 31.23607635498047, 198.33966064453125, 33.24372100830078, -36.8289794921875, -81.11839294433594, 105.60885620117188, 18.45868682861328, 7.3240509033203125, 56.46199035644531, 74.4039306640625, -74.39837646484375, 195.984375, 83.91702270507812, 129.1949462890625, -102.32662963867188, -58.409297943115234, 125.88681030273438, 0.7611503601074219, 188.288330078125, 170.25338745117188, 129.22242736816406, 14.912200927734375, 86.97356414794922, 104.87722778320312, -40.710296630859375, -44.05785369873047, 48.3265380859375, 14.20257568359375, 54.092987060546875, 140.11712646484375, -66.43765258789062, 6.037620544433594, 148.40472412109375, 202.0492706298828, 76.45346069335938, 36.38032531738281, 122.798828125, 24.729528427124023, 55.90540313720703, 49.21617126464844, 90.0098876953125, 60.70689392089844, -75.6832275390625, -78.28384399414062, 155.58795166015625, 118.05718994140625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000362.npy"}
{"epoch": 0.7581151832460733, "step": 363, "batch_size": 128, "mean": 66.35762023925781, "std": 106.26608276367188, "min": -192.90875244140625, "p10": -55.61991577148437, "median": 64.88703918457031, "p90": 182.76591186523436, "max": 450.1629638671875, "pos_frac": 0.734375, "sample": [126.76593017578125, -17.22180938720703, -10.94329833984375, 239.5479736328125, 28.410629272460938, 46.802459716796875, 130.267578125, 7.50511360168457, 83.87655639648438, 314.3147277832031, 12.943695068359375, -53.98808288574219, 72.68257141113281, 3.2593536376953125, 123.14401245117188, 31.43684196472168, 129.08636474609375, -23.003753662109375, 95.38656616210938, 91.00059509277344, 156.70318603515625, 29.95391845703125, 181.950927734375, 92.44508361816406, 6.8477935791015625, 208.40086364746094, -55.132232666015625, -20.744049072265625, -61.280181884765625, 6.5247650146484375, 32.321258544921875, 164.53561401367188, 140.0966796875, 64.292724609375, -5.110589981079102, -76.43353271484375, -3.7307815551757812, 136.7646484375, 228.57620239257812, -11.09765625, 45.404388427734375, 148.90249633789062, 194.19866943359375, -10.487335205078125, 327.0181884765625, 115.910888671875, -141.25376892089844, 95.10713195800781, 64.22195434570312, -17.166879653930664, 176.78585815429688, 6.739990234375, 36.960418701171875, 75.63775634765625, -192.90875244140625, 179.20849609375, 88.0612564086914, -109.33496856689453, -41.549072265625, -24.083251953125, 13.2066650390625, 74.42350769042969, 119.2347412109375, 97.57774353027344, -31.94879150390625, 253.23727416992188, -183.42013549804688, 32.83586120605469, -28.840713500976562, 36.917930603027344, 65.48135375976562, 140.88299560546875, 56.36016845703125, 450.1629638671875, 263.15069580078125, 154.88409423828125, 22.184539794921875, 95.90652465820312, 52.0518798828125, 270.2242431640625, 152.15167236328125, 239.95932006835938, -21.984878540039062, 83.366455078125, 269.677734375, 50.282257080078125, 77.6807632446289, 111.92697143554688, 7.22967529296875, 139.05072021484375, 152.3121337890625, -139.97482299804688, 175.1689453125, 139.75582885742188, 39.79917907714844, 73.00341796875, 162.1121063232422, 135.2166290283203, -12.912834167480469, -72.34445190429688, 147.68865966796875, 129.33087158203125, -79.1598892211914, 31.86077880859375, 65.93202209472656, -20.418052673339844, 100.76864624023438, 1.7632675170898438, 35.00567626953125, 98.08758544921875, 40.599456787109375, 152.84658813476562, -39.808837890625, 1.355926513671875, 3.256927490234375, 77.06399536132812, 171.41397094726562, 128.55130004882812, -103.30709838867188, -117.4734115600586, 184.66754150390625, -39.650535583496094, -87.34291076660156, 176.00106811523438, -46.54194641113281, 79.3641357421875, 78.15447998046875, -56.757843017578125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000363.npy"}
{"epoch": 0.7602094240837697, "step": 364, "batch_size": 128, "mean": 58.35860061645508, "std": 108.98274993896484, "min": -213.44314575195312, "p10": -66.95863037109375, "median": 35.83638000488281, "p90": 192.8808853149414, "max": 331.95806884765625, "pos_frac": 0.734375, "sample": [247.2518310546875, 34.104225158691406, 130.9088134765625, 61.01243591308594, -0.91748046875, 6.191196441650391, 21.28411865234375, 41.61213684082031, 316.7627868652344, -80.27093505859375, 32.03419494628906, 191.10911560058594, -4.147857666015625, 23.58984375, 66.62741088867188, 161.856689453125, 160.51641845703125, 130.81625366210938, -53.09002685546875, 267.5279541015625, -23.133453369140625, -121.64663696289062, 94.90386962890625, 270.78692626953125, 135.600341796875, -4.531982421875, 20.76219940185547, -67.3642578125, 9.269172668457031, 189.39016723632812, 70.46161651611328, 35.80754089355469, 147.39578247070312, 64.94181823730469, 198.46859741210938, 125.53353881835938, 19.635482788085938, 28.153701782226562, 65.20027160644531, 182.73789978027344, -213.44314575195312, -27.357315063476562, 111.17488098144531, -134.12649536132812, 13.781845092773438, 256.1634521484375, 20.402793884277344, -21.452239990234375, 3.43157958984375, 42.613983154296875, 64.40814208984375, 197.0150146484375, 17.348114013671875, 109.12532043457031, -109.3448486328125, 207.4986572265625, 26.330535888671875, 112.4898681640625, 38.62200927734375, 145.24053955078125, -80.97219848632812, 55.184051513671875, 331.95806884765625, -3.3089256286621094, 108.48016357421875, -35.156829833984375, 83.42961883544922, 16.018943786621094, 112.71017456054688, -4.6038818359375, -207.42007446289062, 186.16799926757812, 94.2191162109375, -205.34005737304688, 175.8233642578125, 141.39178466796875, 185.990234375, -15.638269424438477, -211.648193359375, 297.0169677734375, 125.16156005859375, 30.09626007080078, 136.39364624023438, -6.97015380859375, 159.17471313476562, -77.04823303222656, 119.25, 53.568634033203125, 19.751846313476562, 301.65289306640625, 170.49191284179688, 10.204444885253906, 14.936386108398438, 9.66595458984375, 34.195587158203125, -23.93878173828125, -14.083892822265625, 91.6435546875, 55.8001708984375, 0.2269439697265625, 22.93175506591797, 144.2481689453125, -113.3892822265625, 19.6796875, 24.361053466796875, 133.67391967773438, 38.15618896484375, 35.86521911621094, 80.04086303710938, 10.23681640625, 19.27496337890625, -78.51957702636719, 176.005615234375, 71.51907348632812, -31.993614196777344, 204.6276092529297, -1.808837890625, -66.7847900390625, 25.178428649902344, 7.297271728515625, -44.758544921875, -29.091690063476562, 56.80524444580078, 134.66827392578125, -59.96929931640625, 180.94879150390625, 264.33770751953125, -45.19017028808594], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000364.npy"}
{"epoch": 0.762303664921466, "step": 365, "batch_size": 128, "mean": 63.486080169677734, "std": 100.94078063964844, "min": -152.01809692382812, "p10": -57.76134185791015, "median": 44.60986328125, "p90": 199.88880920410156, "max": 306.8255920410156, "pos_frac": 0.703125, "sample": [0.0, 22.829700469970703, 48.658538818359375, 31.64771270751953, -69.88241577148438, 67.38021087646484, -57.508056640625, -76.7579345703125, 25.34881591796875, -22.615150451660156, 33.928466796875, 180.23867797851562, 1.0825080871582031, 178.40478515625, 22.645126342773438, -65.2930908203125, 6.840568542480469, -10.5068359375, 194.41990661621094, 140.29739379882812, -14.316505432128906, -34.4832763671875, 35.977508544921875, -1.133544921875, 266.2186279296875, 75.55400085449219, 198.40313720703125, 30.42723846435547, -10.939903259277344, -6.7387542724609375, -101.33050537109375, -79.21145629882812, 209.37017822265625, 159.99679565429688, 49.879913330078125, 66.09506225585938, 79.57855224609375, 124.72494506835938, 49.28760528564453, 86.09915161132812, -58.35234069824219, 106.8089599609375, -7.4927215576171875, 19.87358856201172, 19.98297119140625, -152.01809692382812, -0.572906494140625, 110.51803588867188, 34.157562255859375, 202.589111328125, 177.55917358398438, 68.90119934082031, 98.25843811035156, -100.71823120117188, 23.31842041015625, 9.98486328125, 73.6258316040039, 35.252227783203125, 217.2449951171875, 269.3266296386719, 162.76243591308594, 135.66488647460938, -43.05975341796875, 55.19549560546875, 0.0, 53.69293212890625, 186.973876953125, 46.9017333984375, -34.435035705566406, 126.83041381835938, 169.2613525390625, -37.99713134765625, 47.09136962890625, 141.28097534179688, -44.4224853515625, 111.00677490234375, 75.59951782226562, 306.8255920410156, 205.92327880859375, 92.12286376953125, 97.66766357421875, -28.74298095703125, -29.891326904296875, 176.99551391601562, 194.78973388671875, 0.0, 52.56452178955078, -2.2674026489257812, 60.380943298339844, -68.64234161376953, -114.99872589111328, 36.505592346191406, 35.23085021972656, -150.666259765625, 299.3221435546875, 42.3179931640625, 197.40267944335938, 263.0355529785156, 30.849441528320312, 22.32989501953125, -75.61229705810547, 240.29293823242188, -31.189666748046875, 64.08650207519531, 170.3882598876953, 240.64010620117188, 58.18030548095703, -39.56574249267578, 36.86181640625, 190.6543731689453, -49.28973388671875, 23.746826171875, 138.0303955078125, -96.434814453125, 303.52508544921875, -9.612113952636719, 42.30670928955078, 192.60736083984375, -4.08038330078125, 104.36869812011719, 2.1274261474609375, 198.73153686523438, 89.89688873291016, 89.28451538085938, 14.1466064453125, 216.72079467773438, 131.4562225341797, 29.6817626953125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000365.npy"}
{"epoch": 0.7643979057591623, "step": 366, "batch_size": 128, "mean": 68.29085540771484, "std": 102.52890014648438, "min": -256.85406494140625, "p10": -44.12387542724609, "median": 59.55742645263672, "p90": 201.0018096923828, "max": 327.2788391113281, "pos_frac": 0.7265625, "sample": [25.238937377929688, 53.88301086425781, 228.82064819335938, 80.12335205078125, 100.076416015625, -68.19659423828125, 163.80618286132812, -124.02532958984375, 86.96353149414062, 95.35169982910156, -64.65188598632812, 80.25373840332031, 165.09414672851562, -24.63983154296875, 56.44134521484375, 96.60297393798828, -6.406227111816406, 195.8038330078125, 66.17901611328125, 151.22088623046875, 69.00732421875, -21.07171630859375, 150.2701416015625, 163.49305725097656, -133.92193603515625, 59.5758056640625, 212.87942504882812, 106.66357421875, 226.68679809570312, -15.93701171875, -52.16278076171875, -20.733474731445312, 166.25958251953125, 17.83984375, 224.82955932617188, 78.06539916992188, -118.68820190429688, 189.28363037109375, 47.041107177734375, -13.1864013671875, -187.06695556640625, 216.00244140625, -71.59333801269531, 144.70985412597656, 14.561737060546875, 155.00479125976562, 19.17230224609375, 126.66815185546875, 110.2707748413086, -256.85406494140625, 123.05453491210938, 313.28448486328125, 136.51028442382812, 34.6614990234375, 18.694244384765625, 179.01138305664062, -1.0909881591796875, 85.62701416015625, 38.723541259765625, 200.19467163085938, 4.154655456542969, -108.52896118164062, 98.69217681884766, 90.8360595703125, -28.138702392578125, 169.58297729492188, 204.162353515625, 97.14067077636719, 13.796703338623047, 11.2564697265625, 28.29931640625, -42.05528259277344, 76.74078369140625, 292.28472900390625, 59.53904724121094, 197.51007080078125, 50.367706298828125, 214.55438232421875, 129.65252685546875, 50.21674346923828, -35.02519226074219, 0.0, -14.823516845703125, 153.06289672851562, 131.4562530517578, 28.611061096191406, 127.55955505371094, -23.921550750732422, -0.06817626953125, 13.661598205566406, -55.96272277832031, 19.00018310546875, 153.84854125976562, 36.58380126953125, -19.452407836914062, 129.77175903320312, 87.28497314453125, -41.28533935546875, -1.8033218383789062, 137.38380432128906, -22.797027587890625, 34.24859619140625, 202.8851318359375, -17.84814453125, 239.68710327148438, 170.26559448242188, 7.516754150390625, 56.9683837890625, -23.991790771484375, 150.03781127929688, 10.856658935546875, 115.14289855957031, 136.9547119140625, 135.1656494140625, 90.3565673828125, -32.07181167602539, 327.2788391113281, 34.038299560546875, -42.40478515625, 22.382320404052734, 110.0156478881836, 115.17965698242188, 37.373477935791016, 137.05154418945312, 1.118896484375, -48.13508605957031, 314.75799560546875, -50.43115234375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000366.npy"}
{"epoch": 0.7664921465968586, "step": 367, "batch_size": 128, "mean": 68.34049987792969, "std": 96.00760650634766, "min": -172.9591827392578, "p10": -41.14970092773436, "median": 59.736602783203125, "p90": 189.372216796875, "max": 350.0448913574219, "pos_frac": 0.7578125, "sample": [162.72122192382812, 148.87765502929688, -22.2196044921875, 188.91970825195312, 8.242437362670898, 41.02833557128906, 198.90707397460938, -94.7254638671875, -37.07269287109375, 135.824951171875, -17.8177490234375, 79.04666137695312, 3.7930755615234375, -29.621566772460938, 164.415283203125, 73.52468872070312, 23.239727020263672, 151.257080078125, 44.73756408691406, 98.18971252441406, -2.0013198852539062, 142.68341064453125, 115.14383697509766, 245.7529296875, 106.14031982421875, 243.89964294433594, 193.9535675048828, 240.9892578125, 60.02777099609375, -172.9591827392578, 107.31036376953125, -10.118568420410156, -29.665294647216797, 50.29052734375, 10.66628646850586, -100.17794799804688, 78.35507202148438, 52.66862487792969, 35.82835388183594, -58.425140380859375, -6.120635986328125, 190.42807006835938, 75.87734985351562, -106.34213256835938, -12.771560668945312, 30.02630615234375, 238.01956176757812, 187.13250732421875, 291.6733093261719, 83.63893127441406, 135.96929931640625, 165.28318786621094, 137.94815063476562, 350.0448913574219, 86.79975128173828, 82.77127075195312, 4.7607421875, 176.6058349609375, 152.93905639648438, 12.259033203125, 16.845062255859375, 158.91578674316406, 69.23492431640625, 114.13165283203125, -58.9053955078125, 23.603256225585938, 131.8935546875, -16.615015029907227, 186.7279052734375, -24.878517150878906, 30.74745750427246, 116.0994873046875, 11.299335479736328, 124.81744384765625, -154.8324737548828, -10.010498046875, 127.91081237792969, 65.54762268066406, -62.21942138671875, -15.78387451171875, 55.914215087890625, 112.80841064453125, 220.382568359375, 204.90316772460938, 2.7219696044921875, 22.07555389404297, 59.4454345703125, 173.4947509765625, -61.507965087890625, 14.753677368164062, 129.30955505371094, 165.659423828125, 75.251953125, 5.9180145263671875, 17.67022705078125, 28.1505126953125, 123.65484619140625, 26.686691284179688, 97.26983642578125, 177.50531005859375, 113.85174560546875, 1.7736740112304688, -127.09381103515625, 94.6617431640625, 40.63230895996094, -12.093414306640625, -59.79029846191406, 79.015625, 16.5772705078125, -50.6627197265625, 253.70144653320312, 139.81753540039062, -3.537139892578125, 173.0548095703125, 92.81559753417969, -0.354766845703125, 51.7239990234375, 99.361083984375, 6.9245758056640625, 18.379528045654297, 20.86517333984375, 5.51300048828125, 130.3111572265625, -10.050521850585938, 260.0257873535156, -67.27532958984375, -17.504669189453125, 101.7698974609375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000367.npy"}
{"epoch": 0.768586387434555, "step": 368, "batch_size": 128, "mean": 84.72393035888672, "std": 89.86734771728516, "min": -142.39828491210938, "p10": -12.429774475097656, "median": 76.42208862304688, "p90": 204.32155456542966, "max": 345.8766784667969, "pos_frac": 0.8359375, "sample": [19.362548828125, 155.52627563476562, 74.86549377441406, 138.20947265625, -37.89966583251953, 134.37432861328125, 34.70819091796875, -123.61334228515625, 210.8233642578125, 54.888641357421875, 345.8766784667969, 43.64656066894531, 35.36167907714844, -142.39828491210938, 9.317230224609375, 116.71752166748047, 211.04745483398438, 102.10042572021484, -61.35443115234375, 128.87896728515625, 72.61022186279297, 97.4981689453125, 49.34989929199219, 97.46212768554688, 12.762969970703125, 48.28759765625, 156.65286254882812, 106.06561279296875, 91.91789245605469, 17.311355590820312, 44.011417388916016, 142.85887145996094, 6.321258544921875, 202.5206298828125, -29.118362426757812, -80.59178161621094, 19.077880859375, 264.30657958984375, 62.707733154296875, 104.81353759765625, -11.994308471679688, -9.46942138671875, 11.5513916015625, 12.436943054199219, 10.773414611816406, 103.40617370605469, 34.960845947265625, 4.489471435546875, 133.51715087890625, 76.31927490234375, -4.2738494873046875, 109.12533569335938, 212.63461303710938, -16.461986541748047, 339.8658752441406, 124.42864990234375, -1.3985214233398438, 112.4102783203125, 13.26055908203125, 145.4036102294922, 159.47967529296875, 156.374267578125, 85.06515502929688, 77.35604858398438, 43.626708984375, -7.4919281005859375, 280.18768310546875, -45.80921936035156, 52.02339172363281, 208.52371215820312, 128.84332275390625, 156.6326904296875, 20.46319580078125, 60.689697265625, 42.80577850341797, 14.058929443359375, 159.56243896484375, 71.44552612304688, -13.44586181640625, 96.09481811523438, 142.343505859375, 42.903350830078125, 61.3160400390625, 169.52696228027344, -73.337158203125, 145.08837890625, -51.193695068359375, 108.70370483398438, 122.32463073730469, 185.62753295898438, 40.643104553222656, -4.8562164306640625, 76.52490234375, -29.50092315673828, -38.843505859375, 177.34735107421875, 152.9772186279297, 238.74517822265625, 60.090911865234375, 115.1845703125, 25.64923095703125, -3.47137451171875, 121.97840881347656, 87.26924133300781, 1.0753936767578125, 188.759765625, 16.599807739257812, 159.14761352539062, 145.6951141357422, 102.15582275390625, 76.1593017578125, 270.0843811035156, 74.6004638671875, 17.082275390625, 163.79254150390625, 9.024642944335938, 236.91952514648438, 189.00588989257812, 38.598388671875, 129.49456787109375, 234.2489013671875, 76.98582458496094, 95.4395523071289, 61.569480895996094, 187.51202392578125, 238.8562469482422, -8.17364501953125, 182.25064086914062], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000368.npy"}
{"epoch": 0.7706806282722513, "step": 369, "batch_size": 128, "mean": 62.33414077758789, "std": 92.96044921875, "min": -187.68312072753906, "p10": -31.25468063354491, "median": 49.915863037109375, "p90": 195.9572830200195, "max": 276.80657958984375, "pos_frac": 0.7421875, "sample": [60.85392761230469, 131.64239501953125, 128.78118896484375, -19.286102294921875, 115.212890625, 134.12335205078125, -8.51898193359375, 215.75607299804688, 33.7027587890625, 148.91688537597656, 43.03900146484375, 132.54151916503906, 99.40609741210938, 237.04010009765625, 15.184280395507812, 47.739166259765625, 139.91104125976562, 5.077869415283203, 63.714599609375, 41.57466125488281, 67.27534484863281, -16.225051879882812, 87.685546875, 190.9325408935547, 32.93285369873047, -144.48983764648438, -13.418106079101562, -27.86199951171875, 44.228607177734375, 134.69842529296875, 65.54393005371094, -10.34710693359375, 6.492694854736328, -11.399078369140625, 21.682106018066406, 112.9613037109375, -1.1013336181640625, 75.15374755859375, 276.80657958984375, 72.23678588867188, 169.99081420898438, 6.01446533203125, 64.39898681640625, 88.75349426269531, 48.690155029296875, -24.0594482421875, 154.50732421875, 191.95892333984375, 48.73443603515625, 6.7113037109375, 248.46832275390625, 32.72877502441406, -39.57075500488281, 210.5749053955078, 49.24481201171875, 73.90740966796875, -1.601898193359375, 126.14923858642578, 152.1693878173828, 229.37918090820312, 24.834869384765625, 143.94398498535156, 51.65142822265625, -19.8431396484375, 32.58399963378906, 47.5150146484375, 66.63763427734375, 48.36993408203125, -74.727294921875, -27.8753662109375, 173.6028594970703, -39.9873046875, -86.51763916015625, -95.55233764648438, 57.010223388671875, -26.1917724609375, -12.133125305175781, -152.8896484375, 248.23915100097656, 23.130615234375, 118.06639862060547, 3.637786865234375, 50.5869140625, 240.31500244140625, 53.398468017578125, 20.446823120117188, 246.99456787109375, 70.91082763671875, 57.6357421875, 0.0, 51.62577819824219, 211.4542694091797, 55.067352294921875, 191.21136474609375, 76.13787841796875, -7.6947479248046875, -129.17062377929688, 101.42449951171875, 220.21139526367188, 181.82952880859375, 23.190216064453125, -73.07037353515625, -8.925813674926758, 151.064453125, 153.74887084960938, 10.680130004882812, 134.07369995117188, 82.7279052734375, -27.221359252929688, 2.4091720581054688, -78.91487884521484, 205.2867889404297, 210.65191650390625, 14.63250732421875, -187.68312072753906, 121.80409240722656, 163.90283203125, -63.153472900390625, -39.139747619628906, 5.984039306640625, -22.795242309570312, 39.133575439453125, 8.721366882324219, 8.224090576171875, 125.07038879394531, 121.01997375488281, 157.155517578125, -23.048797607421875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000369.npy"}
{"epoch": 0.7727748691099476, "step": 370, "batch_size": 128, "mean": 75.46737670898438, "std": 102.79446411132812, "min": -284.7041320800781, "p10": -32.047393989562984, "median": 71.99250793457031, "p90": 202.5306198120117, "max": 384.5269775390625, "pos_frac": 0.7890625, "sample": [-80.21835327148438, -102.98977661132812, 173.96487426757812, 44.31227111816406, 117.64418029785156, 71.97283935546875, 127.8355712890625, 41.91203308105469, 15.5191650390625, 210.930908203125, -11.137825012207031, 139.89462280273438, 17.85888671875, -8.336587905883789, 102.34100341796875, 1.298919677734375, 77.8969497680664, 179.53240966796875, 234.6796875, 145.50331115722656, 191.54075622558594, -6.192657470703125, 76.36495971679688, 82.70843505859375, 155.4207763671875, 50.21685791015625, 20.865753173828125, 235.24853515625, -64.33491516113281, -41.65093994140625, 35.98094177246094, 186.45013427734375, 154.3299560546875, 60.26958465576172, -31.057905197143555, -0.330810546875, 204.55126953125, 76.78935241699219, 10.575363159179688, 174.33990478515625, 206.54058837890625, 113.26455688476562, 125.5911865234375, 138.67721557617188, 39.321807861328125, 143.4104461669922, 160.86676025390625, -45.026519775390625, 155.62347412109375, 24.961408615112305, 28.631439208984375, -9.112945556640625, 47.603397369384766, 215.87576293945312, 47.384735107421875, 114.96630859375, 178.62588500976562, 183.65560913085938, -44.02897644042969, 36.3519287109375, 128.8487548828125, 19.999553680419922, 89.3236083984375, 29.060745239257812, 54.593605041503906, 0.439239501953125, 105.0823974609375, 32.861785888671875, 78.54315185546875, -167.4440460205078, 148.8674774169922, 15.065460205078125, -15.4111328125, 48.31074523925781, 7.670635223388672, 101.67630004882812, 184.4503173828125, 53.460205078125, 246.4036865234375, -284.7041320800781, 56.33917236328125, 199.63058471679688, 158.49639892578125, 6.369293212890625, -15.779708862304688, 101.73849487304688, 155.04766845703125, 132.04046630859375, -115.61293029785156, 167.48880004882812, 73.10179138183594, 105.97286987304688, 26.241973876953125, 6.932136535644531, -0.4819488525390625, 71.68513488769531, -18.048355102539062, 14.931396484375, 120.9810791015625, 43.8143310546875, 75.85739135742188, 94.97238159179688, 201.85130310058594, 102.76493835449219, -7.08673095703125, 40.70654296875, 20.16278076171875, -34.356201171875, 135.30123901367188, 218.79571533203125, -26.154518127441406, 20.196670532226562, 204.11569213867188, 374.5885009765625, 72.01217651367188, 300.11334228515625, 106.35104370117188, 113.52239990234375, 246.08157348632812, -198.12237548828125, -20.706390380859375, 117.37548828125, 384.5269775390625, 26.597938537597656, -37.079856872558594, -107.74043273925781, 145.5103759765625, -14.00543212890625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000370.npy"}
{"epoch": 0.774869109947644, "step": 371, "batch_size": 128, "mean": 57.61955261230469, "std": 92.3456039428711, "min": -187.63946533203125, "p10": -45.683232116699216, "median": 43.21099853515625, "p90": 186.0521026611328, "max": 377.9938049316406, "pos_frac": 0.765625, "sample": [110.70496368408203, 95.16300964355469, 88.41461181640625, 84.1678466796875, -30.197433471679688, 3.58447265625, -185.56207275390625, 137.90240478515625, -92.389892578125, 76.87211608886719, 29.29803466796875, 54.5662841796875, -61.4901123046875, 30.51494598388672, 207.01177978515625, 156.78814697265625, 25.96514892578125, 19.70599365234375, 56.882720947265625, 15.993240356445312, 173.3734130859375, 94.95721435546875, 134.3463592529297, 227.39324951171875, -17.875579833984375, -2.92291259765625, 9.399749755859375, 189.66148376464844, 26.87109375, 80.97737121582031, -109.07437133789062, 159.528076171875, -6.1206817626953125, -5.0333099365234375, 187.50604248046875, 28.267333984375, -92.09359741210938, 20.708885192871094, 64.22755432128906, -6.21014404296875, -18.32562255859375, 194.135009765625, -22.458620071411133, 160.67169189453125, 22.2445068359375, -187.63946533203125, -76.2144775390625, 6.434356689453125, 152.34213256835938, -47.54219055175781, 377.9938049316406, 130.55157470703125, 31.202430725097656, -105.5374755859375, 128.76373291015625, 107.07501983642578, -32.12572479248047, 61.939361572265625, -108.4584732055664, 86.80661010742188, 164.9674530029297, 140.4851531982422, 239.45928955078125, 212.2281494140625, 12.997711181640625, 34.891357421875, 6.3166961669921875, -68.39079284667969, 148.12115478515625, 34.15960693359375, 53.74333953857422, -35.00567626953125, 72.42845153808594, 107.6485595703125, 7.192873001098633, 62.43505859375, 51.83599853515625, 223.23974609375, 116.41305541992188, 29.18688201904297, 185.42898559570312, -60.910400390625, 45.984619140625, 49.661033630371094, 155.72116088867188, 74.51416015625, 86.25396728515625, 41.758209228515625, 61.54718017578125, -23.311256408691406, 6.1767120361328125, 16.28387451171875, 43.806182861328125, -22.79730224609375, 8.261138916015625, 26.549644470214844, 3.634613037109375, 216.78250122070312, 33.76374816894531, 70.816650390625, 29.20635986328125, 37.2137451171875, 30.508331298828125, 0.0, -63.22566223144531, -4.36053466796875, 71.16201782226562, 237.8203125, -19.55405044555664, 48.132080078125, 42.615814208984375, 319.19219970703125, 51.20421600341797, 162.4595947265625, 104.784912109375, -10.722946166992188, 37.797096252441406, 38.14154052734375, 118.00080871582031, 24.38372802734375, 107.4921875, 62.075592041015625, 143.0098876953125, 20.87438201904297, -44.88653564453125, 79.28338623046875, 73.0302963256836, 201.74314880371094], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000371.npy"}
{"epoch": 0.7769633507853403, "step": 372, "batch_size": 128, "mean": 75.9412841796875, "std": 103.368896484375, "min": -232.44931030273438, "p10": -39.525313186645505, "median": 77.40592956542969, "p90": 193.37926635742187, "max": 336.3520202636719, "pos_frac": 0.765625, "sample": [99.41064453125, -0.961944580078125, 126.305908203125, -33.37570571899414, 179.76416015625, -110.05889892578125, 106.94168090820312, 22.361114501953125, 7.230522155761719, 316.3623046875, 18.331727981567383, 63.60570526123047, 41.27693176269531, 193.04556274414062, 31.979171752929688, 101.5848388671875, 6.038722991943359, 83.23100280761719, 164.35543823242188, 23.3184814453125, -45.85307312011719, 105.8709716796875, 134.9298858642578, -30.509727478027344, 158.41995239257812, 24.891250610351562, 148.38206481933594, 76.2076416015625, 137.28623962402344, -0.2183380126953125, 98.257080078125, 85.06903076171875, -6.988563537597656, 120.10931396484375, 153.24880981445312, -18.49267578125, 185.00140380859375, -3.7931060791015625, 104.6788101196289, 133.74627685546875, 323.9217529296875, 88.79383850097656, 126.79780578613281, -5.22637939453125, 2.56121826171875, -8.248779296875, 23.113494873046875, 29.388580322265625, 275.21624755859375, -7.316337585449219, 16.33770751953125, 123.4959716796875, 104.4169921875, 40.802490234375, -27.35931396484375, 15.695938110351562, -77.77049255371094, -104.85392761230469, 85.12664794921875, 95.69363403320312, -4.41291618347168, 52.6199951171875, -2.92852783203125, 198.83834838867188, 52.999847412109375, 336.3520202636719, 48.63780975341797, 164.698974609375, -7.927055358886719, 335.3982849121094, 39.22511291503906, 33.30809020996094, -124.48025512695312, 189.37368774414062, 152.89508056640625, 5.0175933837890625, 57.623870849609375, 181.67190551757812, 50.377418518066406, 106.81076049804688, 69.94970703125, 193.88140869140625, 78.60421752929688, 149.841552734375, 112.31880187988281, 125.35745239257812, -91.04037475585938, 166.82012939453125, -14.761293411254883, 84.21376037597656, -37.758758544921875, 12.290374755859375, 12.790283203125, 202.20565795898438, -43.647274017333984, 125.90892791748047, 121.45384216308594, 96.71607971191406, -49.833778381347656, 118.67486572265625, 254.0433349609375, -232.44931030273438, 151.8070068359375, 240.61154174804688, 179.6057586669922, 124.84466552734375, -47.22406005859375, -6.719024658203125, 189.28182983398438, 82.8271484375, 56.499664306640625, 72.993408203125, -97.4278564453125, 197.62620544433594, 193.1640625, -168.4541778564453, 330.95330810546875, 150.519775390625, 58.904449462890625, 255.09368896484375, 50.88331604003906, 132.47320556640625, 50.8963623046875, -116.1956787109375, 0.5465316772460938, 8.217941284179688, 101.42742919921875, 84.07135009765625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000372.npy"}
{"epoch": 0.7790575916230367, "step": 373, "batch_size": 128, "mean": 67.00389099121094, "std": 89.2184066772461, "min": -257.849365234375, "p10": -40.58264389038085, "median": 60.59196472167969, "p90": 188.86860046386715, "max": 244.31561279296875, "pos_frac": 0.8125, "sample": [25.9810791015625, 17.93201446533203, 208.23480224609375, 145.65689086914062, 107.42633056640625, 47.93170166015625, 91.80819702148438, 118.2652587890625, 10.971210479736328, 63.573883056640625, 15.981185913085938, 96.673828125, 128.82171630859375, 201.81512451171875, 13.592056274414062, 126.97152709960938, 156.3939208984375, 31.725234985351562, -44.75940704345703, 182.61264038085938, 41.80564880371094, 144.1873779296875, -8.086185455322266, 10.617950439453125, -182.59996032714844, -60.318572998046875, 244.31561279296875, -75.14029693603516, 179.6323699951172, 110.07205200195312, 11.202302932739258, -64.6864013671875, 3.367725372314453, 35.678985595703125, 38.107208251953125, 149.2803955078125, 204.44647216796875, -26.571685791015625, 107.24993896484375, 121.29946899414062, 87.4144287109375, 108.90955352783203, 152.2798614501953, 88.8304443359375, -30.650375366210938, -65.26513671875, 40.04559326171875, -33.236785888671875, 61.676361083984375, 124.84931945800781, 74.15460205078125, 164.62477111816406, 135.9627227783203, 136.6240692138672, 154.03616333007812, 232.61160278320312, 99.68232727050781, 212.02386474609375, 138.06602478027344, 143.75057983398438, 99.89656066894531, 25.474945068359375, 0.9485950469970703, 80.91911315917969, 54.064208984375, 144.97467041015625, 88.9200439453125, -79.80291748046875, 93.09613037109375, 32.31462097167969, 89.85873413085938, 107.05166625976562, -38.7926025390625, 71.76847839355469, -10.039016723632812, -76.78395080566406, 196.1273193359375, -31.2452392578125, 59.507568359375, 28.068069458007812, 20.58271026611328, 52.45219421386719, 135.84664916992188, 53.130332946777344, 46.42799377441406, 1.466552734375, 72.99568176269531, 0.2093658447265625, 197.12774658203125, 58.951934814453125, -66.93362426757812, 135.51661682128906, -6.445026397705078, 169.21759033203125, 23.05206298828125, 138.59060668945312, 67.23493957519531, 210.8029022216797, 57.41082763671875, 137.35000610351562, -13.6221923828125, 121.759033203125, 18.54473876953125, -96.83241271972656, 41.967529296875, 185.75772094726562, 17.582603454589844, -108.6270751953125, 9.878667831420898, -257.849365234375, 227.92987060546875, 56.71788024902344, 50.13006591796875, -7.06756591796875, 5.69378662109375, -60.689598083496094, 178.36758422851562, 85.18655395507812, 10.669422149658203, -12.623695373535156, 3.5003662109375, 28.870193481445312, 199.40921020507812, 203.94488525390625, 82.20693969726562, 214.51171875, 49.4774169921875, 142.52902221679688], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000373.npy"}
{"epoch": 0.7811518324607329, "step": 374, "batch_size": 128, "mean": 58.535213470458984, "std": 96.27845001220703, "min": -222.86593627929688, "p10": -42.73900604248046, "median": 40.23898696899414, "p90": 193.50070343017578, "max": 301.97784423828125, "pos_frac": 0.671875, "sample": [229.74761962890625, 0.0, -0.830352783203125, 257.4930114746094, -7.434783935546875, 28.080080032348633, -35.6837158203125, 10.704277038574219, 28.0562744140625, 150.99395751953125, 137.98422241210938, -1.5893402099609375, -222.86593627929688, -39.20472717285156, 72.79511260986328, -61.161956787109375, 11.417633056640625, 46.767547607421875, 120.91558837890625, 31.87689208984375, 271.04449462890625, 45.34552001953125, 246.08322143554688, 114.8309326171875, 178.10874938964844, -47.987457275390625, 130.876220703125, 162.51145935058594, 20.454071044921875, 33.83197021484375, 71.53240966796875, 9.27056884765625, -14.81341552734375, -76.12554931640625, -40.48966979980469, 41.881744384765625, 48.887664794921875, -36.842620849609375, 165.4078369140625, 193.1730499267578, 108.27648162841797, -33.623077392578125, 248.65322875976562, -31.97998046875, 99.9237060546875, -32.19621276855469, 93.27569580078125, 47.8641357421875, -13.92193603515625, 130.60891723632812, 4.965179443359375, 42.638519287109375, -2.2646484375, -5.4612579345703125, 17.476547241210938, 67.40657043457031, 0.0, 152.12106323242188, 24.36751937866211, 32.7197265625, 70.44584655761719, -2.594451904296875, -76.13491821289062, 101.95433044433594, 254.60543823242188, 163.02001953125, 25.054119110107422, 238.45587158203125, 80.1319580078125, -7.34490966796875, 282.1917724609375, -13.530616760253906, 148.21847534179688, 29.612030029296875, 47.149932861328125, 57.503387451171875, 119.46315002441406, 89.35845947265625, 194.26522827148438, -2.3818817138671875, 98.66787719726562, 56.46806335449219, -100.7044677734375, -7.9589385986328125, -18.547000885009766, -24.45825958251953, 126.14752197265625, 160.07876586914062, -70.02398681640625, -53.859375, 74.01568603515625, 59.12361145019531, 301.97784423828125, 185.35025024414062, 99.81021118164062, 65.46161651611328, 121.55203247070312, -52.39947509765625, 237.0284881591797, 30.313079833984375, 104.0728759765625, -1.66619873046875, -6.357631683349609, 82.36178588867188, 22.98552894592285, -68.24420928955078, 38.596229553222656, -9.428421020507812, 62.93589782714844, 198.9591064453125, 189.66897583007812, 19.524551391601562, -56.39404296875, 24.25750732421875, 199.87701416015625, 164.02578735351562, -144.94039916992188, -36.40723419189453, -2.8564453125, 0.466552734375, 180.2349395751953, 174.75711059570312, 0.78448486328125, 13.52734375, 52.0316162109375, -13.199520111083984, 94.90225219726562, -105.3480224609375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000374.npy"}
{"epoch": 0.7832460732984293, "step": 375, "batch_size": 128, "mean": 53.71455001831055, "std": 92.85626983642578, "min": -181.62127685546875, "p10": -56.388217163085926, "median": 33.28538131713867, "p90": 191.57371673583984, "max": 275.38848876953125, "pos_frac": 0.71875, "sample": [92.27030944824219, 89.93878173828125, 77.07527160644531, -71.74916076660156, -53.0218505859375, -49.00215148925781, 0.9676094055175781, 184.581298828125, 30.47283935546875, 196.5037078857422, 38.095130920410156, -12.251174926757812, 23.9566650390625, -75.98126220703125, -122.89396667480469, -32.882537841796875, 40.76433563232422, -3.2347412109375, -14.32244873046875, 5.078529357910156, -90.20025634765625, 190.4501953125, 129.49781799316406, 244.631591796875, 69.0684814453125, 32.77711486816406, -3.1260986328125, -92.76705932617188, 127.84083557128906, 67.89791870117188, 162.767333984375, 3.2326507568359375, 63.3714599609375, 33.79364776611328, 67.520263671875, -181.62127685546875, 85.7056884765625, 110.15286254882812, 14.685890197753906, -72.1966552734375, 194.1952667236328, 173.70260620117188, -30.033294677734375, 9.947097778320312, 105.45703125, 34.595916748046875, 128.33157348632812, -10.70306396484375, 275.38848876953125, -8.89377212524414, -11.924713134765625, 19.343006134033203, -31.558334350585938, -137.01904296875, 163.05047607421875, 38.23480224609375, -2.9470062255859375, 31.853103637695312, -9.637969970703125, 152.54241943359375, 47.356529235839844, 23.585689544677734, 0.4924278259277344, -38.80560302734375, 13.696155548095703, 9.24267578125, 148.93975830078125, 40.343021392822266, 8.07293701171875, 22.557876586914062, -64.24307250976562, 14.717010498046875, 36.419677734375, 29.62944793701172, 7.0422210693359375, -23.001197814941406, 135.6834716796875, 15.20452880859375, -43.58941650390625, -81.35540771484375, 103.2685546875, 106.7093276977539, 40.12200927734375, 217.94775390625, 120.38427734375, -5.15057373046875, 146.93524169921875, 160.6072998046875, 97.9144287109375, 140.1361083984375, 10.471244812011719, 201.34814453125, 83.09042358398438, 10.034507751464844, 225.96551513671875, -13.050048828125, 148.9457244873047, 124.4862060546875, 21.8358154296875, 4.07574462890625, -78.12371826171875, 243.01988220214844, 123.93380737304688, 144.10565185546875, -18.686050415039062, 97.84919738769531, 74.45761108398438, 18.97631072998047, 109.52120971679688, 216.6934814453125, -10.7191162109375, 269.95648193359375, 188.59552001953125, -84.7998046875, 66.79434204101562, 92.39002990722656, -30.72088623046875, 36.80810546875, 6.3335723876953125, 205.6053466796875, 110.34722900390625, 13.6090087890625, 205.83575439453125, 248.55075073242188, -7.767333984375, 63.726898193359375, 29.022674560546875, -99.69230651855469], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000375.npy"}
{"epoch": 0.7853403141361257, "step": 376, "batch_size": 128, "mean": 62.018619537353516, "std": 102.46505737304688, "min": -165.757080078125, "p10": -54.84010238647459, "median": 42.25532150268555, "p90": 188.6733245849609, "max": 363.9578857421875, "pos_frac": 0.7109375, "sample": [-114.03776550292969, 182.03857421875, 197.9324951171875, 142.48873901367188, 177.6201629638672, 150.81411743164062, -21.883544921875, -19.4136962890625, 3.6042251586914062, 30.46234130859375, 55.79425048828125, 233.32920837402344, 45.90386962890625, 16.210784912109375, -122.21467590332031, 145.37945556640625, 29.529205322265625, 199.96002197265625, 193.5391845703125, 56.06336212158203, -111.48495483398438, -0.380767822265625, -11.6739501953125, 43.66143035888672, 40.849212646484375, 186.74563598632812, 133.429931640625, 3.10272216796875, -68.77145385742188, 59.0941162109375, 132.85177612304688, -35.05004119873047, 143.73333740234375, 363.9578857421875, 0.7738037109375, -16.834640502929688, 136.10894775390625, -14.752998352050781, 310.190185546875, -99.91168212890625, 125.60334777832031, -117.30166625976562, 131.3768310546875, 20.271881103515625, 87.58975219726562, 34.488525390625, 243.19378662109375, 91.56985473632812, -14.371841430664062, -1.21966552734375, 99.630615234375, 14.16217041015625, -7.360565185546875, 104.4381103515625, -120.39677429199219, 133.42230224609375, 110.91326904296875, 87.65800476074219, 20.938264846801758, -165.757080078125, 63.39361572265625, -50.16127014160156, 109.48287963867188, 18.66301727294922, 142.1866455078125, -16.844757080078125, 94.95849609375, -36.880950927734375, 0.0, 31.869728088378906, 65.94673919677734, -0.7780227661132812, 164.58541870117188, -42.31549072265625, -160.2899932861328, 193.1712646484375, 59.15350341796875, 115.59172058105469, -65.75737762451172, -12.037353515625, 10.891021728515625, 55.85877990722656, 26.329668045043945, 9.700042724609375, 148.55517578125, 28.512653350830078, 161.0676727294922, 164.95497131347656, 8.384765625, -29.595382690429688, 73.78494262695312, -2.3086318969726562, 5.904788970947266, 122.11111450195312, 129.70960998535156, 132.39459228515625, -71.28439331054688, 29.4879150390625, -35.5762939453125, 37.37940979003906, 0.7626724243164062, 289.58197021484375, -2.769500732421875, 173.2752685546875, 34.818939208984375, 176.49951171875, 99.25912475585938, 52.51173400878906, 132.49398803710938, 7.9581298828125, 304.42535400390625, 182.35198974609375, -24.883010864257812, 11.125797271728516, -127.841796875, 169.38473510742188, 198.63368225097656, 245.407470703125, -20.0653076171875, -68.17849731445312, 88.90452575683594, -3.2534255981445312, 2.7366905212402344, 114.29590606689453, 162.07505798339844, 1.994354248046875, 250.3863525390625, 148.6832275390625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000376.npy"}
{"epoch": 0.787434554973822, "step": 377, "batch_size": 128, "mean": 44.85930633544922, "std": 99.4041748046875, "min": -271.31658935546875, "p10": -61.331991577148436, "median": 32.32240676879883, "p90": 180.1458999633789, "max": 278.6153564453125, "pos_frac": 0.6953125, "sample": [226.942626953125, 33.848976135253906, 47.7952880859375, 7.8980255126953125, 29.770233154296875, -196.68798828125, 101.63209533691406, 112.98175048828125, 103.93023681640625, -63.689727783203125, -25.34473419189453, -10.654052734375, -99.81695556640625, -60.321533203125, 6.08966064453125, 224.13143920898438, -180.56195068359375, 180.0620880126953, 146.36605834960938, 0.0, -7.2054595947265625, 15.383514404296875, 46.60986328125, 117.26708984375, 122.18496704101562, 203.23944091796875, -8.00628662109375, -47.85935974121094, 118.41473388671875, 250.9852294921875, 143.871826171875, 159.99188232421875, 75.08685302734375, 4.2121429443359375, 30.79583740234375, 67.19052124023438, -1.7125988006591797, 2.50933837890625, -27.609207153320312, 25.39959716796875, 20.664581298828125, 9.27606201171875, -5.290336608886719, 9.01024055480957, 0.19561004638671875, 20.932388305664062, 152.404296875, -104.90242004394531, 0.2022705078125, 113.61534881591797, 72.47994995117188, -20.347145080566406, 146.4481201171875, -151.1365203857422, -36.499488830566406, 40.042327880859375, -74.86697387695312, -49.3814697265625, 43.299049377441406, 134.03524780273438, 124.99246215820312, -149.2938232421875, -12.899139404296875, 167.3121337890625, 157.55685424804688, -70.45639038085938, 50.61948776245117, -36.86236572265625, 57.70660400390625, 83.76397705078125, 43.692108154296875, -271.31658935546875, -20.2573299407959, 5.547054290771484, 117.99319458007812, -30.691307067871094, -155.84146118164062, -9.099418640136719, 70.1616439819336, 262.6566162109375, 34.20564270019531, 185.84249877929688, -5.545013427734375, -6.72174072265625, 150.19952392578125, -40.209449768066406, 183.62908935546875, 58.84800720214844, 180.34146118164062, 129.49948120117188, 143.78656005859375, 17.411483764648438, 5.402229309082031, -47.43634033203125, -167.33668518066406, 257.85125732421875, 132.56759643554688, -64.11279296875, 122.16542053222656, 19.060577392578125, 25.18975067138672, 4.6270751953125, 238.29595947265625, 49.97174072265625, 102.29354858398438, 42.67242431640625, 38.09651184082031, 49.02178955078125, 181.7574462890625, 85.2838134765625, 77.99606323242188, 25.0126953125, 17.333106994628906, -27.819580078125, -17.76319122314453, 115.37762451171875, 54.0162353515625, 112.07318115234375, 43.05378341674805, 3.5446853637695312, 45.725059509277344, 278.6153564453125, 13.1541748046875, 192.97317504882812, -25.889144897460938, 16.074630737304688, -26.4129638671875, 157.68014526367188], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000377.npy"}
{"epoch": 0.7895287958115184, "step": 378, "batch_size": 128, "mean": 52.20862579345703, "std": 92.76737213134766, "min": -192.9990234375, "p10": -51.70161590576172, "median": 41.474205017089844, "p90": 192.4468017578125, "max": 347.101318359375, "pos_frac": 0.7109375, "sample": [53.59796142578125, 3.67279052734375, -17.003631591796875, 143.18826293945312, 54.442901611328125, 63.297576904296875, 90.870849609375, 113.04383850097656, -192.9990234375, 180.964599609375, 285.0274658203125, -36.89434814453125, -13.60174560546875, 129.25238037109375, 54.050537109375, 140.55828857421875, 192.17803955078125, 104.1072998046875, 32.45851135253906, 14.57354736328125, -9.9305419921875, 41.47355651855469, 19.3953857421875, 12.886688232421875, 56.62840270996094, -41.262237548828125, -51.6744384765625, 43.730308532714844, -80.48632049560547, 18.1409912109375, 209.14288330078125, 207.5999755859375, -7.10650634765625, 27.490659713745117, -33.9075927734375, 13.8603515625, -49.64597702026367, 23.373273849487305, -92.08403015136719, 137.4892578125, 84.5550537109375, 127.59909057617188, 1.273162841796875, -53.11256408691406, 14.738227844238281, 41.474853515625, 2.0186767578125, 15.89508056640625, 193.07391357421875, 61.82093048095703, -8.559906005859375, 47.35577392578125, -57.971099853515625, 178.731201171875, 9.310516357421875, -21.395919799804688, -14.068328857421875, -4.969146728515625, 127.57162475585938, 10.665069580078125, 241.08343505859375, 71.7198486328125, 101.79220581054688, 142.98770141601562, 111.51638793945312, 195.46551513671875, -10.782577514648438, 81.51292419433594, 0.0, 215.89053344726562, -30.107086181640625, 204.14520263671875, 99.85238647460938, 145.80091857910156, 247.85855102539062, 114.17071533203125, -164.69949340820312, 20.677337646484375, -87.829345703125, 347.101318359375, 6.15594482421875, 22.581573486328125, 7.433917999267578, 79.0579833984375, -51.76502990722656, -12.486114501953125, 12.16473388671875, 115.0404052734375, 44.436309814453125, -72.094482421875, 79.77166748046875, 148.15216064453125, -150.93865966796875, 75.31086730957031, 56.7938232421875, 53.1302490234375, 57.027313232421875, 112.36863708496094, 123.59475708007812, -103.68449401855469, 243.82725524902344, -61.0218505859375, -5.3827362060546875, 220.6392822265625, 49.24920654296875, 231.59371948242188, 5.98394775390625, 65.18569946289062, 136.3521728515625, -3.5906753540039062, 96.9424819946289, 70.66876220703125, -2.291748046875, -12.977996826171875, 43.38189697265625, -4.6017913818359375, 5.665771484375, 79.3267822265625, 40.517730712890625, 87.56532287597656, -1.0307769775390625, 26.3839111328125, 38.88360595703125, 32.193153381347656, 140.27252197265625, -85.3377685546875, 48.905517578125, -50.713653564453125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000378.npy"}
{"epoch": 0.7916230366492146, "step": 379, "batch_size": 128, "mean": 55.76250457763672, "std": 96.50183868408203, "min": -178.98358154296875, "p10": -47.89824752807617, "median": 41.14228630065918, "p90": 181.64908752441406, "max": 256.7861328125, "pos_frac": 0.6875, "sample": [-21.993587493896484, -9.324142456054688, 54.25471496582031, 21.320777893066406, 225.3135986328125, 76.14111328125, 85.14735412597656, -24.298553466796875, 40.266475677490234, -38.901580810546875, 116.12326049804688, 114.54209899902344, -58.7498779296875, 42.018096923828125, -8.7908935546875, -37.74420166015625, 79.53353881835938, 123.098388671875, 79.31214904785156, 206.575927734375, 17.01904296875, -0.8421630859375, -60.07428741455078, 256.7861328125, 39.3780517578125, 149.6888427734375, 78.03482055664062, -27.897220611572266, 256.3399963378906, 78.15303039550781, 6.174629211425781, -162.47488403320312, 28.66753387451172, -37.06097412109375, -1.222930908203125, 60.083404541015625, 152.0370635986328, 240.30462646484375, 181.57473754882812, 119.36992645263672, 97.95370483398438, 156.65475463867188, 137.80612182617188, -46.687713623046875, 44.24896240234375, -144.554931640625, -38.6810302734375, 186.62046813964844, 15.71435546875, -35.494415283203125, -26.717914581298828, 0.0, 161.67355346679688, -178.98358154296875, -8.2208251953125, 72.39053344726562, 122.55560302734375, 68.96830749511719, -11.34197998046875, 142.4680633544922, 54.046356201171875, 17.29722785949707, 5.141632080078125, 143.3291015625, 35.001373291015625, 16.24578094482422, 90.6876220703125, 94.82058715820312, 77.70428466796875, -76.39691162109375, 20.842529296875, -36.859134674072266, -29.01921844482422, -47.239990234375, 130.44271850585938, 23.22943115234375, 177.71084594726562, 136.93569946289062, 18.54943084716797, 14.67437744140625, -33.94163513183594, 31.803436279296875, 70.7144775390625, 175.0150146484375, 181.82257080078125, 177.980224609375, 249.8475799560547, -61.210205078125, -87.89556884765625, 168.0528106689453, 93.15835571289062, -25.814315795898438, -158.3035430908203, 129.87957763671875, -138.75033569335938, -19.16773223876953, 50.405860900878906, 23.3773193359375, -8.632720947265625, 35.21104431152344, 32.89794921875, 205.68893432617188, -5.573646545410156, -147.26080322265625, 1.57757568359375, -1.9280929565429688, 35.2021484375, 119.84942626953125, 89.95101165771484, -49.434181213378906, 11.202011108398438, 169.74961853027344, 102.9635009765625, 102.2877197265625, 109.59671020507812, 237.82568359375, 37.707611083984375, 235.30355834960938, 169.23565673828125, 119.0465087890625, 243.50547790527344, 88.86949157714844, 61.603302001953125, 22.2587890625, -60.617218017578125, 123.80441284179688, -23.81268310546875, 231.14810180664062], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000379.npy"}
{"epoch": 0.793717277486911, "step": 380, "batch_size": 128, "mean": 60.55318832397461, "std": 96.52681732177734, "min": -299.2680969238281, "p10": -46.861489868164064, "median": 54.427303314208984, "p90": 180.83729858398434, "max": 356.64495849609375, "pos_frac": 0.765625, "sample": [77.78121948242188, 105.86971282958984, 114.24140930175781, 66.65618896484375, 40.931640625, 86.09683227539062, 99.21835327148438, -54.170379638671875, 247.09646606445312, 5.70367431640625, 124.02350616455078, -12.399742126464844, 311.68194580078125, -27.72015380859375, -78.57687377929688, 39.25907897949219, 121.581298828125, 216.234130859375, -84.20806884765625, 23.059326171875, -11.553192138671875, -22.9647216796875, 309.7765808105469, 243.70245361328125, 3.9516868591308594, 356.64495849609375, 5.403564453125, 129.8744659423828, 105.13044738769531, 63.65114974975586, 60.529991149902344, 50.7825927734375, 23.482391357421875, 32.4818115234375, -299.2680969238281, 12.941940307617188, 105.3837890625, 64.87776184082031, 54.77919006347656, -32.303070068359375, 0.0, 111.23503112792969, 27.77526092529297, 38.71197509765625, 156.04762268066406, -13.75372314453125, 53.407684326171875, 25.319091796875, 24.7720947265625, -100.66606903076172, 187.2550048828125, 282.76251220703125, 24.67148208618164, 70.09150695800781, 17.534263610839844, 66.48684692382812, 85.2305908203125, -9.194679260253906, 102.56820678710938, 93.55307006835938, -23.514862060546875, 117.83123779296875, 105.12196350097656, 233.54937744140625, 6.459861755371094, -94.20404052734375, 137.47726440429688, 70.17190551757812, 98.32598876953125, 97.43930053710938, 264.6173095703125, -23.322845458984375, 49.502410888671875, 130.29339599609375, -17.847427368164062, 143.8975830078125, 14.4022216796875, 19.119110107421875, 254.30511474609375, 93.19120788574219, 8.396783828735352, 146.3299102783203, 144.45654296875, 41.91912078857422, 49.98893737792969, -46.29939270019531, 113.91595458984375, 92.62896728515625, 116.69674682617188, 117.62425231933594, 25.49444580078125, 193.7725830078125, 83.4697265625, 12.47491455078125, 78.44314575195312, 89.56146240234375, 40.4468994140625, -140.36410522460938, 72.2032470703125, 12.932487487792969, -75.45491790771484, -60.46099853515625, 6.20416259765625, 117.958251953125, -10.380462646484375, 20.60028076171875, 63.936248779296875, 178.08685302734375, 91.737060546875, 83.69233703613281, 82.18675231933594, 99.98046875, 124.2305908203125, -104.65597534179688, 99.65335083007812, 7.9130096435546875, 40.53443908691406, -12.768878936767578, -22.3201904296875, -22.315231323242188, -52.844879150390625, 69.50802612304688, 45.66175079345703, 54.075416564941406, -109.37651062011719, -10.732734680175781, 239.950927734375, -48.17304992675781], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000380.npy"}
{"epoch": 0.7958115183246073, "step": 381, "batch_size": 128, "mean": 65.66065216064453, "std": 98.11283874511719, "min": -166.1275634765625, "p10": -44.14723358154297, "median": 49.607810974121094, "p90": 185.57477111816405, "max": 379.99102783203125, "pos_frac": 0.7265625, "sample": [92.31301879882812, 260.70660400390625, 100.25198364257812, -1.5689163208007812, -158.07626342773438, 133.72146606445312, 4.7848052978515625, 34.079063415527344, 0.0, 181.69473266601562, 2.4076004028320312, 80.83984375, -46.737037658691406, 110.68678283691406, 152.71023559570312, 97.08235168457031, 25.624053955078125, 102.58222961425781, 82.90351104736328, -63.68060302734375, -20.828590393066406, 25.21343994140625, -16.065658569335938, -97.42150115966797, 95.9775390625, -47.191978454589844, 131.02035522460938, 133.236083984375, 52.806549072265625, 204.63043212890625, 96.75970458984375, 118.90880584716797, 121.32443237304688, 27.40673828125, 172.06570434570312, 303.5943298339844, 15.38818359375, 88.55899047851562, 6.96038818359375, 14.33367919921875, 379.99102783203125, 101.9873046875, -30.214706420898438, 171.05023193359375, 84.16627502441406, 4.3282928466796875, 107.01644897460938, 145.72683715820312, -15.61956787109375, -24.097808837890625, 65.23592376708984, -20.703907012939453, 82.10720825195312, 18.265365600585938, -39.97247314453125, -12.610565185546875, 209.41845703125, -8.2845458984375, -43.93988037109375, -19.210554122924805, 32.844520568847656, 66.86784362792969, 181.26406860351562, 18.113250732421875, 154.9097137451172, 37.61149215698242, 161.36380004882812, 72.96295166015625, 202.3209228515625, 72.85421752929688, -163.41632080078125, 31.878570556640625, 131.7694091796875, 55.771728515625, 18.839927673339844, -166.1275634765625, 22.0350341796875, 184.6237030029297, 273.72412109375, 133.28759765625, -20.238067626953125, 123.30355834960938, -2.35174560546875, 41.252471923828125, -78.88375854492188, 204.5940399169922, -55.4884033203125, 36.84141540527344, 187.79393005371094, 27.987335205078125, 180.73074340820312, -85.81854248046875, 158.07138061523438, 333.750244140625, 138.60873413085938, -6.34808349609375, 83.45292663574219, 59.3760986328125, 154.239501953125, 113.4512939453125, 281.8013610839844, 29.778411865234375, 90.55233764648438, -0.47554779052734375, 0.0, 28.21923828125, 94.86029052734375, 98.93604278564453, -1.6061553955078125, -8.38287353515625, 194.399169921875, 16.390228271484375, -31.20748519897461, -33.537109375, 12.183074951171875, 36.972312927246094, -44.63105773925781, 46.07830810546875, 43.894134521484375, 283.45257568359375, 138.93063354492188, 96.10427856445312, 46.40907287597656, -67.803955078125, -50.073944091796875, 126.46517944335938, 62.754852294921875, 22.6329345703125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000381.npy"}
{"epoch": 0.7979057591623037, "step": 382, "batch_size": 128, "mean": 74.41495513916016, "std": 92.46510314941406, "min": -150.7249755859375, "p10": -30.638133811950684, "median": 70.37562561035156, "p90": 201.5868713378906, "max": 299.30328369140625, "pos_frac": 0.78125, "sample": [-20.124618530273438, 57.91766357421875, 105.9827880859375, 30.956695556640625, -7.534423828125, 45.116119384765625, 121.7843017578125, 76.54793548583984, 95.18991088867188, 112.2078857421875, 185.67523193359375, -40.21615219116211, 111.45086669921875, 31.104949951171875, 299.30328369140625, 55.23585510253906, 18.954193115234375, -20.797588348388672, 148.2386474609375, 57.79254150390625, 120.2864761352539, -149.979736328125, -21.78887939453125, -26.2452392578125, 73.52545166015625, -133.179931640625, 91.10366821289062, 69.29608917236328, 170.13925170898438, 85.4820556640625, 7.855072021484375, -36.375335693359375, 158.16995239257812, 161.43963623046875, 207.68722534179688, -31.317138671875, 71.45516204833984, 37.95408630371094, -30.347131729125977, 141.87612915039062, 41.30615234375, -29.925949096679688, 151.6298065185547, -7.27716064453125, 50.309776306152344, 175.71484375, -67.42660522460938, 32.81239318847656, 169.04022216796875, 18.68499755859375, 232.81036376953125, 80.10897827148438, 74.7725830078125, 171.24517822265625, 167.32041931152344, 170.1211395263672, 38.974273681640625, 118.0032958984375, -71.43545532226562, 215.54931640625, -99.50527954101562, 129.52645874023438, -12.836891174316406, 180.0861053466797, 79.76545715332031, 168.32022094726562, 59.74652099609375, -150.7249755859375, 126.217041015625, 77.62445068359375, 169.65115356445312, 53.5860595703125, 24.27593231201172, 85.11344909667969, 40.351318359375, -55.38435363769531, 201.14401245117188, 158.77581787109375, 14.48590087890625, -25.164093017578125, 4.1031951904296875, 65.513671875, 211.70086669921875, 128.51612854003906, 4.72015380859375, -113.15628051757812, 15.445655822753906, 110.47225952148438, -7.6394500732421875, 202.62020874023438, 210.33392333984375, 6.1837158203125, 161.50433349609375, -26.766815185546875, 268.634033203125, 109.77450561523438, -22.87786865234375, 56.2814826965332, 159.63665771484375, 21.599365234375, 233.605224609375, 153.0701141357422, 79.10391235351562, 104.75393676757812, 80.46463012695312, 113.28909301757812, 74.9735107421875, 171.21160888671875, -40.2001953125, 25.282577514648438, 254.44351196289062, 61.39717102050781, -13.8077392578125, 40.4163818359375, 54.97235870361328, 96.69793701171875, -55.51609802246094, 17.336029052734375, 46.761810302734375, 4.705108642578125, 41.06767272949219, 218.09365844726562, 219.36190795898438, -4.111785888671875, 193.7566375732422, 154.86962890625, 240.5796356201172, 2.719970703125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000382.npy"}
{"epoch": 0.8, "step": 383, "batch_size": 128, "mean": 65.1346664428711, "std": 96.0198974609375, "min": -179.83242797851562, "p10": -66.88296813964843, "median": 63.568695068359375, "p90": 201.67850799560546, "max": 290.32049560546875, "pos_frac": 0.7421875, "sample": [14.18182373046875, 144.8631591796875, 222.14381408691406, 156.92660522460938, 6.998023986816406, -65.21820831298828, 208.837158203125, 65.99530029296875, 61.56911849975586, -76.93499755859375, 93.75772094726562, -122.8795166015625, 92.01132202148438, -0.48687744140625, 83.33834838867188, 5.78125, -179.83242797851562, 7.896484375, 148.090087890625, 61.92808532714844, -85.87068176269531, 205.37506103515625, 89.62985229492188, 166.9932403564453, 90.70120239257812, 122.98257446289062, 111.01983642578125, 181.09011840820312, 82.8431396484375, 53.39923095703125, -0.734954833984375, -70.70088195800781, -37.00398254394531, 23.8162841796875, 0.368682861328125, 8.654998779296875, 166.07217407226562, 208.7596435546875, 107.52813720703125, 80.51103973388672, 31.0645751953125, 290.32049560546875, 165.10623168945312, 177.11363220214844, -28.56288719177246, 178.48284912109375, 54.662506103515625, -168.9185028076172, -79.00251770019531, 80.18014526367188, -18.231002807617188, 103.91751098632812, 149.7547607421875, 62.952362060546875, 229.53912353515625, 33.636566162109375, 87.46925354003906, 200.80152893066406, 64.18502807617188, 3.7087860107421875, 99.21844482421875, 76.02342987060547, 238.87713623046875, 77.09783935546875, 2.3383636474609375, 12.666213989257812, 73.75975799560547, -65.24671936035156, 74.8436508178711, 9.276924133300781, 114.22048950195312, 203.72479248046875, 30.57220458984375, 213.0196533203125, -2.0057373046875, -22.07941246032715, 8.119125366210938, 54.91413879394531, -6.704858779907227, -73.84140014648438, 195.27389526367188, 98.8251953125, 169.081787109375, 47.951438903808594, -9.375, 26.735183715820312, 70.72370910644531, -31.233795166015625, 8.424507141113281, -85.366943359375, 112.9161376953125, -26.008831024169922, 55.394317626953125, -7.225444793701172, 186.7769775390625, -134.78201293945312, 112.72046661376953, 212.3961181640625, 0.0, -74.18878173828125, -100.71351623535156, -8.103851318359375, 143.6297607421875, 168.6761474609375, 83.57942962646484, 17.016056060791016, 185.095947265625, 129.3369140625, 44.701751708984375, 116.900634765625, -4.11915397644043, 262.22705078125, -7.25079345703125, 51.412353515625, 79.20516204833984, 146.27581787109375, 0.0, 64.70578002929688, 226.42657470703125, -83.46487426757812, 252.4454345703125, 33.40419006347656, 98.32485961914062, 2.014629364013672, 113.52801513671875, -0.69110107421875, 172.8653564453125, 49.39398193359375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000383.npy"}
{"epoch": 0.8020942408376963, "step": 384, "batch_size": 128, "mean": 55.873146057128906, "std": 92.20642852783203, "min": -162.8505859375, "p10": -45.38500061035156, "median": 44.18553924560547, "p90": 182.66472015380856, "max": 292.3659362792969, "pos_frac": 0.7265625, "sample": [222.07867431640625, -124.80950927734375, 81.9720458984375, -36.48529052734375, 52.37823486328125, 13.726043701171875, -47.628875732421875, 75.47547149658203, 179.3870391845703, 31.234718322753906, 231.7734375, -54.807708740234375, -16.341522216796875, 32.172325134277344, 115.97396850585938, 210.38531494140625, 277.64813232421875, 90.06193542480469, 61.87445068359375, 47.8463134765625, 5.65020751953125, 138.82257080078125, 95.59642028808594, -25.59844207763672, 87.39801025390625, -69.42520141601562, 125.8929443359375, 108.47222900390625, 120.832763671875, 29.163055419921875, -10.508218765258789, 10.7440185546875, -28.14617919921875, 170.3597412109375, 54.098541259765625, 244.53729248046875, 6.6990814208984375, 84.40798950195312, 43.11224365234375, 67.67643737792969, 31.666637420654297, 171.6895751953125, 141.8455810546875, 92.82205200195312, 2.5871334075927734, 48.07269287109375, -162.8505859375, 82.0738525390625, 136.64599609375, -7.587890625, 38.0330810546875, 20.50214385986328, -0.699493408203125, -42.91766357421875, 102.60015869140625, -34.4443359375, 167.37661743164062, 224.01083374023438, 47.2939453125, -44.42333984375, 148.08267211914062, 197.63314819335938, 29.40113067626953, 149.9868927001953, 21.447402954101562, 26.161300659179688, 36.804237365722656, -79.36720275878906, -35.4866943359375, 6.916900634765625, 21.56037139892578, 6.402305603027344, 10.817047119140625, 130.16412353515625, 55.4754638671875, 56.32975769042969, 225.81298828125, 153.13983154296875, 1.247772216796875, 10.5919189453125, 188.8179931640625, 58.38275146484375, 52.124603271484375, 151.68243408203125, -136.73028564453125, -0.9570102691650391, 14.768203735351562, 14.51486587524414, 40.80397033691406, 55.060302734375, 195.88436889648438, -27.69708251953125, 156.99844360351562, -21.272354125976562, -7.455348968505859, 26.578094482421875, 91.58961486816406, -99.95947265625, 96.92416381835938, -139.04019165039062, 194.5580596923828, 180.02760314941406, 195.15487670898438, -139.98239135742188, 35.27972412109375, -25.79302978515625, 45.25883483886719, -15.13543701171875, 26.826812744140625, -40.61669921875, 117.43963623046875, 106.50212097167969, -34.362518310546875, 5.1484222412109375, -13.896591186523438, 147.24212646484375, 129.04388427734375, -7.609832763671875, 142.6795654296875, -55.07513427734375, 155.4069061279297, 292.3659362792969, -61.70533752441406, -35.29603576660156, 90.30642700195312, 111.859375, 71.7742919921875, -67.77606201171875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000384.npy"}
{"epoch": 0.8041884816753927, "step": 385, "batch_size": 128, "mean": 58.404727935791016, "std": 85.44194030761719, "min": -193.97793579101562, "p10": -32.55972213745117, "median": 47.14485168457031, "p90": 168.70003662109372, "max": 313.0983581542969, "pos_frac": 0.7421875, "sample": [-44.768218994140625, 53.7134895324707, 194.06155395507812, 95.86895751953125, 110.67748260498047, -30.704193115234375, 165.085693359375, 234.9547119140625, 117.6458740234375, 158.5582733154297, 37.12156677246094, -5.817779541015625, -193.97793579101562, 61.101318359375, 313.0983581542969, 162.34536743164062, 150.86672973632812, 88.55145263671875, -20.277618408203125, 57.800628662109375, 0.0, 60.07475280761719, 83.23825073242188, 21.112625122070312, -2.6118927001953125, 91.81010437011719, 109.5355453491211, -12.481040954589844, -18.30694580078125, -27.569137573242188, 147.7550811767578, -1.69659423828125, 29.638671875, 11.60845947265625, 249.0079345703125, 70.514892578125, 9.574066162109375, 17.2154541015625, 115.58142852783203, 233.07992553710938, 109.6143798828125, -56.7904052734375, -8.833465576171875, -46.55686950683594, 211.87417602539062, -6.868900299072266, 101.33135986328125, 17.46331787109375, -22.728286743164062, 145.97576904296875, 130.2559814453125, 72.59265899658203, -36.88928985595703, 123.27452087402344, 60.9061279296875, -56.2056884765625, 96.56048583984375, 25.263320922851562, -71.39888000488281, 121.44686126708984, 62.223175048828125, 42.88963317871094, 82.52557373046875, 121.80392456054688, 117.87298583984375, 108.07461547851562, 10.316986083984375, -12.1846923828125, 13.364471435546875, 18.288108825683594, 166.04833984375, 87.92637634277344, 87.55416870117188, 118.31048583984375, 118.48307037353516, -8.224620819091797, 25.776885986328125, 2.822385787963867, -124.78280639648438, 0.0731658935546875, 0.42047119140625, 31.787776947021484, 31.05078125, -54.269378662109375, 92.73291015625, 18.94762420654297, 28.03826904296875, 7.134883880615234, -123.71099853515625, 12.6751708984375, 66.03727722167969, 2.8430442810058594, 42.42439270019531, -43.37114715576172, -20.676712036132812, 46.406951904296875, -30.65625, 47.88275146484375, 77.04812622070312, 62.61236572265625, 120.447021484375, -79.09823608398438, 150.91995239257812, 177.33511352539062, 28.43194580078125, 64.2093505859375, 8.01824951171875, 228.63592529296875, -16.697021484375, 234.60073852539062, -49.477294921875, 241.78396606445312, 125.70054626464844, 41.8505859375, 101.69796752929688, -10.88128662109375, -9.938323974609375, 187.1763916015625, 97.20037841796875, 0.4967842102050781, 36.924591064453125, 60.000274658203125, 229.69754028320312, 56.683349609375, -13.059432983398438, 115.93048095703125, 174.8873291015625, 34.533416748046875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000385.npy"}
{"epoch": 0.806282722513089, "step": 386, "batch_size": 128, "mean": 75.94027709960938, "std": 97.83843231201172, "min": -161.93426513671875, "p10": -17.82315216064453, "median": 59.90535354614258, "p90": 216.2668685913086, "max": 285.9148864746094, "pos_frac": 0.8046875, "sample": [30.9461669921875, -7.090259552001953, -3.069793701171875, 92.74803161621094, 176.24673461914062, 109.8536376953125, -17.700485229492188, 40.76513671875, 56.2901611328125, -11.621063232421875, 5.9371337890625, 250.26502990722656, 34.65309143066406, 149.2554931640625, 78.267333984375, -3.3095703125, 3.5520553588867188, 2.715240478515625, 29.418304443359375, 10.838310241699219, 71.69134521484375, -4.323265075683594, 154.5931396484375, 83.43646240234375, 153.57455444335938, 151.03741455078125, -161.93426513671875, 46.91375732421875, 191.37107849121094, -35.07972717285156, 23.809249877929688, 163.48838806152344, 111.03912353515625, 51.20953369140625, 59.12042236328125, 177.26730346679688, 20.386276245117188, 89.33218383789062, 92.34228515625, 215.67340087890625, 28.609603881835938, 181.97293090820312, 59.737518310546875, 70.13397216796875, -69.455322265625, -68.19763946533203, 217.84310913085938, 40.11358642578125, 180.77003479003906, -135.4599151611328, 210.28814697265625, 90.019775390625, 217.65162658691406, 168.15170288085938, 99.9283447265625, 285.9148864746094, 14.073577880859375, 189.4302520751953, 127.13131713867188, 5.936767578125, 33.139739990234375, 30.1846923828125, 83.35712432861328, 280.89801025390625, 28.924209594726562, 72.86831665039062, 194.70985412597656, 274.2175598144531, 275.0455322265625, 98.29208374023438, 65.87232971191406, 60.35441589355469, 60.51287841796875, -132.59132385253906, 81.32936096191406, 45.83526611328125, 39.20825958251953, 0.0, -21.926666259765625, -106.93108367919922, 110.69598388671875, 224.84442138671875, 283.8493347167969, 255.95901489257812, 60.37571716308594, 203.5445556640625, -18.109375, 13.748077392578125, 22.927215576171875, 49.77595520019531, 24.625030517578125, -19.561264038085938, 47.69830322265625, 183.86935424804688, 55.42097473144531, -9.636371612548828, -149.03192138671875, 75.40533447265625, 170.14285278320312, 11.035369873046875, 109.13838195800781, 151.380859375, 229.87191772460938, -142.81622314453125, 203.12533569335938, 28.8642578125, 39.26361083984375, 37.438323974609375, 7.3358154296875, 1.7867431640625, 92.7069091796875, 53.14404296875, 9.88385009765625, -43.67332458496094, 95.04449462890625, 125.87907409667969, -9.72845458984375, 107.3988037109375, 161.7213134765625, 83.46505737304688, 11.83929443359375, 0.0, 268.181396484375, -13.570899963378906, 270.62908935546875, 0.0, 122.59039306640625, 60.07318878173828], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000386.npy"}
{"epoch": 0.8083769633507853, "step": 387, "batch_size": 128, "mean": 65.2135238647461, "std": 97.32713317871094, "min": -246.71470642089844, "p10": -44.49436035156249, "median": 56.80168533325195, "p90": 192.18179321289062, "max": 326.2629699707031, "pos_frac": 0.7734375, "sample": [52.116973876953125, 22.13648223876953, 83.96157836914062, 14.826568603515625, 177.05801391601562, 235.27059936523438, 34.22331237792969, 124.2152099609375, 31.533836364746094, -49.68548583984375, -36.24061584472656, 15.57379150390625, 53.657432556152344, 24.81208038330078, 30.972671508789062, 239.39248657226562, 162.63845825195312, -187.8759765625, 59.13397216796875, 197.09780883789062, 29.647003173828125, 18.4769287109375, 188.31661987304688, 219.423828125, 88.01985931396484, -21.479278564453125, 160.24627685546875, 2.34375, 58.84062194824219, 75.51425170898438, 144.8858642578125, 63.118675231933594, 11.818328857421875, 175.68942260742188, 143.57766723632812, -32.354278564453125, -16.03106689453125, 51.15349578857422, 108.29895782470703, -2.59375, 24.057479858398438, 43.31620788574219, 140.31629943847656, -73.57025146484375, -122.37356567382812, 6.07501220703125, -34.0716552734375, 68.33033752441406, 112.10848999023438, -49.82207489013672, 24.3199462890625, -9.929779052734375, 34.02946090698242, 54.85784912109375, -20.44549560546875, 182.2532958984375, 110.71827697753906, 214.89266967773438, 21.123470306396484, 9.835418701171875, 136.15740966796875, 70.29489135742188, 220.7279052734375, 34.765769958496094, -246.71470642089844, -42.26959228515625, 0.0, 179.006103515625, -72.953857421875, 217.0289306640625, 17.204299926757812, -7.0308837890625, 56.38874816894531, 192.69512939453125, -107.24539184570312, -34.87925720214844, 67.55584716796875, -1.5086212158203125, 129.84658813476562, 136.43890380859375, 61.33709716796875, 116.69985961914062, 185.41384887695312, -11.125961303710938, -52.020057678222656, -94.2431640625, -151.37069702148438, 20.028671264648438, 80.3563232421875, 189.60133361816406, 30.765029907226562, 57.214622497558594, -7.2874755859375, -125.92630004882812, 191.9617919921875, 238.70733642578125, 67.35223388671875, 132.6146240234375, 167.5863037109375, 326.2629699707031, 185.20852661132812, 15.076011657714844, 68.29623413085938, 49.37139892578125, 194.14088439941406, 15.19822883605957, 174.44088745117188, 173.86383056640625, 123.46488952636719, 68.8023681640625, 88.50360107421875, 29.365234375, 113.80945587158203, 64.6248550415039, -9.076873779296875, 16.48345947265625, 128.9356689453125, 224.38555908203125, 28.6121826171875, 67.06175994873047, -57.066253662109375, 15.25244140625, 254.4656982421875, 128.192138671875, 72.1761474609375, 118.23579406738281, 17.723533630371094, 120.593017578125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000387.npy"}
{"epoch": 0.8104712041884817, "step": 388, "batch_size": 128, "mean": 47.06230163574219, "std": 102.1624755859375, "min": -217.04254150390625, "p10": -57.897900390625, "median": 34.75228500366211, "p90": 184.31431274414064, "max": 301.4962463378906, "pos_frac": 0.6796875, "sample": [44.09912109375, 12.7479248046875, 166.61505126953125, 0.0, 0.0, 69.65121459960938, 20.177505493164062, -26.770263671875, 17.190231323242188, -11.450927734375, 172.3897705078125, 5.600799560546875, -3.818328857421875, -4.9462890625, -3.0981826782226562, 32.7022705078125, 78.32347869873047, -140.6334228515625, 91.11679077148438, -140.35208129882812, 301.4962463378906, 59.26288604736328, 7.586097717285156, 45.284095764160156, 9.669540405273438, 151.21609497070312, 140.67715454101562, -28.266693115234375, 30.6697998046875, -19.58551025390625, 18.94249725341797, -19.66408920288086, 215.074951171875, 126.91387939453125, -17.00897216796875, 209.81272888183594, -42.3746337890625, 45.09967041015625, 57.982818603515625, 41.767723083496094, 264.54486083984375, 260.10687255859375, -99.02737426757812, -21.550201416015625, 184.494140625, -145.28591918945312, 25.828704833984375, 77.72588348388672, 216.4161376953125, 27.913480758666992, 72.71237182617188, 222.85348510742188, 2.525177001953125, 91.321044921875, 120.89959716796875, 280.8785400390625, 12.732658386230469, 36.686431884765625, 8.19342041015625, 121.47508239746094, 95.56695556640625, 70.673828125, 119.03939819335938, 65.01248168945312, -40.83134460449219, 192.7748260498047, -104.624267578125, 139.31503295898438, 78.42218017578125, 10.375511169433594, 177.43026733398438, 201.31663513183594, 10.368133544921875, -126.04110717773438, 35.36951446533203, 155.50592041015625, 150.90286254882812, 172.26104736328125, -15.4893798828125, 24.837417602539062, 135.09918212890625, 80.68948364257812, 37.326812744140625, 90.63333129882812, -56.5926513671875, 22.101577758789062, 14.40032958984375, 33.88427734375, 19.924667358398438, 111.76349639892578, -60.9434814453125, 34.13505554199219, -27.811973571777344, -36.48951721191406, 66.30258178710938, 73.75527954101562, -48.0484619140625, -14.64361572265625, -10.553207397460938, 127.311279296875, -217.04254150390625, -6.68359375, 147.71591186523438, -5.3466644287109375, 67.07879638671875, 184.23724365234375, -126.22613525390625, -71.8509521484375, -36.407249450683594, 51.454010009765625, 121.12216186523438, -45.53187561035156, 87.7108154296875, 281.1297607421875, 39.231475830078125, -185.48330688476562, -178.5950469970703, 0.04671764373779297, 110.46237182617188, 77.64654541015625, -21.189117431640625, -173.356201171875, -27.58655548095703, 249.40689086914062, 44.48223876953125, 147.29721069335938, 77.64891815185547, -47.37286376953125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000388.npy"}
{"epoch": 0.812565445026178, "step": 389, "batch_size": 128, "mean": 61.58222961425781, "std": 102.47698974609375, "min": -174.39549255371094, "p10": -59.13522186279297, "median": 62.90385437011719, "p90": 196.72823638916014, "max": 322.16375732421875, "pos_frac": 0.6953125, "sample": [-79.39085388183594, -16.525344848632812, 4.059806823730469, 86.18048095703125, 119.89987182617188, -109.23927307128906, -49.259063720703125, -33.2938232421875, -36.491729736328125, 116.7591552734375, 67.54594421386719, 223.40744018554688, 95.1051025390625, 139.70822143554688, -57.91700744628906, 172.42344665527344, -72.07421875, 106.53962707519531, 123.7254638671875, 145.61460876464844, 108.87229919433594, -111.91845703125, -51.0582275390625, 162.99346923828125, 122.11196899414062, 149.25405883789062, 101.84539794921875, 322.16375732421875, 3.33380126953125, 11.27093505859375, 203.0682373046875, 199.79489135742188, 68.08416748046875, 122.4586181640625, 41.00775146484375, 47.162506103515625, -109.30211639404297, 65.09600830078125, 243.37155151367188, 151.51498413085938, -3.4787826538085938, 22.02227020263672, 105.15716552734375, 139.69451904296875, 49.0914306640625, 5.937826156616211, 190.3231201171875, 25.71905517578125, 116.28522491455078, 3.630859375, 108.10401916503906, 42.006134033203125, -93.46774291992188, 158.92071533203125, -10.06646728515625, -36.793190002441406, -14.995674133300781, 315.34625244140625, -174.39549255371094, 144.7825927734375, 66.56451416015625, 21.111785888671875, -99.02326965332031, -35.084800720214844, -46.5821533203125, 241.8839111328125, 110.97113037109375, 158.94625854492188, 311.0555419921875, 0.0, 17.384140014648438, -8.395599365234375, -51.116519927978516, 63.92669677734375, -44.59417724609375, 121.00631713867188, 91.21478271484375, 63.2650146484375, -17.0162353515625, -18.633407592773438, 83.93600463867188, 195.41395568847656, -11.63360595703125, 192.4381103515625, 85.000244140625, 141.69345092773438, 0.0, 116.1507339477539, 17.350021362304688, -7.50213623046875, 119.14788818359375, 63.290496826171875, 3.1381988525390625, 63.060646057128906, -115.3023681640625, -90.58683776855469, -9.71868896484375, 217.77264404296875, -9.890995025634766, 109.919921875, 214.81735229492188, -61.97772216796875, 215.44839477539062, 109.02490234375, 70.79954528808594, 315.52789306640625, 45.851806640625, -150.8753662109375, 32.185333251953125, 0.9058609008789062, 180.56895446777344, 185.761474609375, 23.120819091796875, 79.49609375, -19.431549072265625, 7.2020263671875, 76.24148559570312, 176.83639526367188, 62.74706268310547, -76.44819641113281, 142.89340209960938, 47.073768615722656, -30.13370132446289, -55.28681945800781, 7.1239776611328125, 27.513763427734375, 38.291015625, 214.95834350585938], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000389.npy"}
{"epoch": 0.8146596858638744, "step": 390, "batch_size": 128, "mean": 63.64183807373047, "std": 93.6567153930664, "min": -139.4747314453125, "p10": -46.92082138061523, "median": 56.17015075683594, "p90": 188.37899780273435, "max": 334.92498779296875, "pos_frac": 0.7109375, "sample": [149.148681640625, -8.18182373046875, 58.484619140625, 179.666015625, 28.226608276367188, 103.4352798461914, 43.320953369140625, 23.301300048828125, -48.49486541748047, 94.41940307617188, 158.48204040527344, 144.99542236328125, 121.91824340820312, -31.038116455078125, -0.3243408203125, 85.46615600585938, 234.56674194335938, 20.797134399414062, 264.8228759765625, 121.42950439453125, -40.830078125, 232.3398895263672, 59.964500427246094, -64.53033447265625, -98.25208282470703, 96.03707885742188, 154.3074951171875, -139.4747314453125, -15.716278076171875, 48.5072021484375, 56.598480224609375, 25.155242919921875, -12.266204833984375, 37.35685729980469, 18.476808547973633, 17.054290771484375, 87.18588256835938, -4.359588623046875, -28.3895263671875, 60.55891418457031, 85.01741027832031, -26.566635131835938, 217.77279663085938, 16.836471557617188, -46.24623107910156, 56.9259033203125, 334.92498779296875, -104.40975952148438, -32.273040771484375, 173.00970458984375, 53.679779052734375, 166.03109741210938, 11.807069778442383, -1.6027984619140625, 201.43142700195312, 149.40164184570312, 3.4117431640625, 120.25543212890625, 236.49053955078125, 142.77720642089844, 160.7198486328125, 132.95675659179688, 61.671478271484375, 247.12994384765625, 186.69744873046875, 137.15567016601562, -40.303131103515625, 7.9802703857421875, -92.16839599609375, 124.46549987792969, -76.71426391601562, 120.18698120117188, 132.0723876953125, 32.674560546875, -20.6585693359375, 126.04168701171875, -23.35125732421875, 147.65182495117188, 124.92893981933594, -59.917694091796875, -33.64898681640625, 0.0, 44.61663818359375, -11.963897705078125, -15.435104370117188, 81.52667236328125, 192.3026123046875, 16.166290283203125, 29.4813232421875, -136.24603271484375, 57.748287200927734, 202.2946014404297, 52.41856384277344, 55.7418212890625, 50.09521484375, 194.155029296875, -9.363418579101562, 47.4588623046875, 131.0274658203125, 202.22549438476562, 69.54330444335938, -128.5103759765625, -5.594203948974609, -100.44094848632812, 133.66119384765625, 89.79531860351562, 162.250244140625, 71.52601623535156, 52.73646545410156, 162.59368896484375, 131.83859252929688, 3.2776565551757812, 110.46142578125, -79.85867309570312, 148.66329956054688, 201.0211944580078, 110.82426452636719, 14.63311767578125, -27.75506591796875, -15.171844482421875, 68.627197265625, 85.52685546875, 31.45843505859375, -20.397964477539062, 174.11410522460938, -63.128143310546875, 47.58546447753906, 144.21209716796875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000390.npy"}
{"epoch": 0.8167539267015707, "step": 391, "batch_size": 128, "mean": 64.4569091796875, "std": 95.53313446044922, "min": -139.7337188720703, "p10": -47.6073486328125, "median": 54.94654083251953, "p90": 190.31546325683593, "max": 399.5582275390625, "pos_frac": 0.765625, "sample": [119.21554565429688, -50.91450500488281, 18.310457229614258, 63.09213638305664, 99.52935791015625, 15.403059005737305, 49.393463134765625, 74.98703002929688, 32.116050720214844, 112.5367431640625, -39.06776428222656, -19.307464599609375, -28.80826187133789, 11.324310302734375, 94.39662170410156, 76.28263854980469, 55.769805908203125, 126.0198974609375, 213.09130859375, 11.422321319580078, 59.144561767578125, 165.6427001953125, 177.30181884765625, 136.50592041015625, 62.45025634765625, 45.423828125, 103.94522094726562, 181.24517822265625, 182.50042724609375, 18.43988037109375, 137.13375854492188, 155.003662109375, 133.15467834472656, 39.098480224609375, -36.0986328125, 44.72509765625, -43.53303527832031, 1.279226303100586, -139.7337188720703, 154.85870361328125, -67.87928771972656, 86.4953384399414, 275.2197265625, 227.35281372070312, -12.816970825195312, 53.04151916503906, -54.543792724609375, 50.44825744628906, 78.66915893554688, 3.42120361328125, 18.95904541015625, 212.06182861328125, -99.93756103515625, -80.1591796875, 70.16818237304688, -69.06671142578125, 94.01934814453125, 4.362640380859375, 19.420204162597656, 79.16830444335938, 143.77154541015625, 190.75161743164062, 306.83282470703125, 63.933258056640625, -47.5477294921875, 0.43726539611816406, 28.18499755859375, 57.2650146484375, 29.88695526123047, 175.30235290527344, 109.31257629394531, 399.5582275390625, -4.1940155029296875, 108.2427978515625, 118.34918212890625, 69.68505859375, 26.621444702148438, 280.9924621582031, 133.47068786621094, -37.1640625, 46.25830078125, 54.68113708496094, 92.61193084716797, -30.92022705078125, 12.699600219726562, 10.164051055908203, -2.4240264892578125, 104.02590942382812, 190.1285400390625, -6.093475341796875, 19.88153076171875, 10.333036422729492, -62.30487060546875, -73.74456787109375, 35.6402587890625, 22.213760375976562, -45.04387283325195, 67.78370666503906, 195.37701416015625, 55.211944580078125, 139.00457763671875, 18.11138916015625, 179.8749542236328, 27.090621948242188, -16.99807357788086, -7.20074462890625, -16.029083251953125, -110.6575927734375, 112.82632446289062, -130.36477661132812, 137.682861328125, 17.67784881591797, 234.556396484375, 85.62411499023438, -47.7464599609375, 98.70060729980469, 197.1724853515625, 130.55673217773438, 20.380279541015625, 130.76632690429688, -115.44398498535156, 237.3092041015625, 170.40484619140625, 90.11544799804688, 39.11982727050781, 76.7020263671875, -35.36773681640625, 234.75921630859375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000391.npy"}
{"epoch": 0.818848167539267, "step": 392, "batch_size": 128, "mean": 81.97569274902344, "std": 100.49893951416016, "min": -293.1717529296875, "p10": -22.15741271972656, "median": 68.09374237060547, "p90": 200.40761413574216, "max": 391.3342590332031, "pos_frac": 0.796875, "sample": [148.5218505859375, 177.26611328125, 232.53512573242188, 103.22335815429688, 205.73663330078125, 79.58096313476562, 95.67655944824219, 195.7021484375, -293.1717529296875, 261.0374755859375, 49.834991455078125, -132.99447631835938, 144.00648498535156, 35.399818420410156, 152.09881591796875, 198.12374877929688, 160.57864379882812, 49.62274169921875, -44.061614990234375, 185.41644287109375, 65.89215087890625, 90.13668823242188, 191.03082275390625, -19.235488891601562, 23.494861602783203, 96.11492919921875, -30.998870849609375, 31.41827392578125, 31.211097717285156, 19.652740478515625, 262.70123291015625, 13.0142822265625, 66.4390869140625, 15.380966186523438, 103.03350830078125, 174.76242065429688, 272.5113525390625, 154.3813934326172, 137.138916015625, 13.4912109375, 128.0985107421875, -66.1060791015625, -64.4417724609375, 158.7220458984375, 226.35525512695312, 47.602386474609375, 301.2486572265625, 153.33163452148438, 310.93707275390625, -8.0146484375, 128.71478271484375, 229.76007080078125, 171.16220092773438, -0.40670204162597656, 50.18748474121094, -2.6646289825439453, 95.98281860351562, 21.160675048828125, 5.9859619140625, 18.937286376953125, 391.3342590332031, -20.89117431640625, -25.111968994140625, 151.97251892089844, 150.664794921875, 186.525146484375, -50.04485321044922, 133.13461303710938, 181.46585083007812, 181.61550903320312, 23.305076599121094, 29.10235595703125, 128.77899169921875, 132.49993896484375, 0.0, -66.729248046875, 6.614105224609375, 156.0885009765625, 65.42340087890625, 33.82334899902344, 165.53326416015625, 127.40645599365234, 73.4429931640625, 229.18902587890625, 92.04974365234375, 96.85836791992188, 4.141815185546875, 138.52383422851562, 30.030899047851562, 15.47808837890625, 6.01702880859375, 0.0, 70.16754913330078, 156.43093872070312, -58.02833557128906, 132.1763916015625, 100.27857208251953, 28.60516357421875, -3.991607666015625, -20.068992614746094, -4.234710693359375, 59.69570541381836, -3.9031295776367188, 8.1878662109375, 48.91822814941406, 308.5133056640625, 9.596298217773438, 66.89846801757812, 34.48014831542969, -0.9147109985351562, -69.18034362792969, 163.870849609375, 49.58522033691406, -43.64845275878906, 6.5345306396484375, 28.263912200927734, 123.00297546386719, 34.152099609375, -57.571502685546875, 93.19332885742188, 154.72064208984375, 27.81549072265625, 148.43154907226562, 221.9852294921875, -12.829193115234375, 69.28901672363281, 154.47647094726562, 77.48928833007812], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000392.npy"}
{"epoch": 0.8209424083769633, "step": 393, "batch_size": 128, "mean": 61.667999267578125, "std": 95.52548217773438, "min": -216.82232666015625, "p10": -41.77696075439453, "median": 50.362810134887695, "p90": 173.37507324218745, "max": 327.9378967285156, "pos_frac": 0.7109375, "sample": [-1.3721981048583984, 32.453529357910156, -3.762054443359375, 22.282135009765625, -23.849639892578125, 151.54547119140625, 103.38433837890625, 86.88640594482422, -46.43155288696289, 10.455951690673828, 71.82830047607422, 135.75885009765625, -69.94021606445312, 133.81997680664062, 301.0377197265625, 45.576759338378906, 45.11003875732422, 142.62417602539062, 78.90782165527344, 143.9974365234375, -6.577606201171875, -53.91326904296875, 134.44436645507812, -216.82232666015625, 22.880674362182617, 10.358497619628906, 181.06524658203125, -78.88516235351562, 8.789657592773438, 12.080352783203125, -11.410171508789062, -12.133956909179688, 88.2264175415039, -74.19940185546875, 122.1072998046875, 141.3062744140625, 12.123031616210938, 147.40206909179688, 11.897613525390625, 132.89895629882812, 155.33636474609375, 58.286376953125, 78.41177368164062, 107.64480590820312, 1.4131011962890625, 40.9036865234375, 128.7847442626953, 38.65655517578125, -94.8802490234375, 16.5291748046875, -31.88543701171875, 47.44716262817383, -16.558536529541016, 85.36105346679688, 64.63623046875, -6.1739959716796875, 90.48479461669922, 29.415367126464844, 196.4345703125, 3.219329833984375, 170.07928466796875, -40.55595397949219, -79.44754791259766, -32.14094543457031, 159.81558227539062, 13.29925537109375, 39.413330078125, -53.287109375, 66.631591796875, -33.25721740722656, 133.6627197265625, 93.60562133789062, -89.1197509765625, 239.99404907226562, 164.51992797851562, 107.68833923339844, 2.6677303314208984, -2.6829986572265625, 166.8230743408203, -24.295326232910156, 14.856338500976562, 327.9378967285156, 40.03730010986328, 63.183624267578125, 216.36639404296875, 70.14179992675781, 53.27845764160156, 0.0, 151.672119140625, 157.2953643798828, -14.510467529296875, 281.5093994140625, 69.28649139404297, 25.660003662109375, 128.43215942382812, 139.33673095703125, 59.28688049316406, -6.054206848144531, 136.91810607910156, 46.988800048828125, 58.70294189453125, 119.8768310546875, -36.75267028808594, -27.78466796875, 104.00015258789062, 85.56353759765625, 242.322509765625, -7.69451904296875, -183.1070556640625, 46.8582763671875, 79.97401428222656, -23.247650146484375, 294.601318359375, -44.6259765625, 206.29318237304688, -37.607479095458984, -71.01142883300781, 73.86123657226562, 192.32940673828125, 22.3560791015625, 148.45523071289062, 213.66285705566406, 260.30810546875, 69.16116333007812, -15.782569885253906, -18.80462646484375, 100.35185241699219, 150.78854370117188], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000393.npy"}
{"epoch": 0.8230366492146597, "step": 394, "batch_size": 128, "mean": 53.996795654296875, "std": 102.03178405761719, "min": -135.8091583251953, "p10": -66.43624877929688, "median": 30.680070877075195, "p90": 199.1441955566406, "max": 315.6092529296875, "pos_frac": 0.6484375, "sample": [129.06143188476562, -30.380126953125, 155.15826416015625, 70.5916748046875, -29.115066528320312, -66.43373107910156, 315.6092529296875, 16.745025634765625, 197.01950073242188, -31.70305633544922, 130.1119384765625, -34.72181701660156, 251.733642578125, 3.646728515625, -70.68563842773438, 9.445404052734375, 192.07656860351562, 37.458457946777344, -66.44212341308594, 100.841064453125, -9.0819091796875, 59.03778076171875, 12.406890869140625, 88.70599365234375, 40.262535095214844, -87.12164306640625, 167.02706909179688, -13.61529541015625, 137.27908325195312, 212.17605590820312, 203.17205810546875, -80.53594970703125, 240.2720947265625, 212.71652221679688, -52.652587890625, -27.122940063476562, -10.600814819335938, 118.01077270507812, -48.709686279296875, 59.92906188964844, 0.52349853515625, 195.94720458984375, 247.46743774414062, 10.833370208740234, 107.97158813476562, -43.782958984375, -22.0504150390625, 219.02838134765625, -18.822181701660156, 49.510528564453125, 88.81185913085938, 271.0247802734375, -79.22190856933594, 208.98703002929688, 32.955291748046875, -97.75887298583984, -134.77633666992188, 88.12982177734375, 64.12548828125, 107.29127502441406, 137.09814453125, -2.61334228515625, 11.282379150390625, 96.95294189453125, 8.2694091796875, 128.83221435546875, 71.547607421875, -10.290283203125, 156.74395751953125, 150.713623046875, 72.81967163085938, -98.47123718261719, 1.1051273345947266, -36.21241760253906, 28.4178466796875, 71.49533081054688, -11.10791015625, -3.735595703125, 30.331069946289062, 25.231414794921875, 272.5731201171875, 18.272109985351562, 259.7183837890625, -35.3770751953125, 135.17922973632812, 308.3228759765625, -89.00347900390625, 67.1727294921875, 89.89653015136719, 98.73095703125, -16.182891845703125, -20.737201690673828, 13.61993408203125, 172.18374633789062, 9.18206787109375, 120.53872680664062, 36.77117919921875, 39.336761474609375, -6.92974853515625, -31.42523193359375, 41.533084869384766, 131.7342529296875, 55.0362548828125, 31.029071807861328, 149.49755859375, 27.58306884765625, -54.018829345703125, 0.0, 136.08602905273438, -61.09808349609375, -13.574142456054688, 14.198066711425781, -102.83160400390625, -53.203887939453125, -1.3141403198242188, 195.07666015625, 109.47386169433594, 197.41796875, 139.567626953125, 91.05307006835938, 9.737457275390625, 7.6803436279296875, -62.341949462890625, -2.3270721435546875, -135.8091583251953, -105.56596374511719, -94.86776733398438, 193.81768798828125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000394.npy"}
{"epoch": 0.8251308900523561, "step": 395, "batch_size": 128, "mean": 70.88543701171875, "std": 97.40445709228516, "min": -135.21188354492188, "p10": -26.040043640136716, "median": 49.56101989746094, "p90": 210.0480499267578, "max": 347.733642578125, "pos_frac": 0.765625, "sample": [92.607177734375, 173.5935516357422, -11.229377746582031, -34.997230529785156, -22.01043701171875, -55.68708801269531, -14.299087524414062, 85.82577514648438, 45.5572509765625, -11.606781005859375, 56.4249267578125, 18.386810302734375, 129.7113800048828, 170.24014282226562, 172.56610107421875, -135.21188354492188, 152.777587890625, -14.82098388671875, 23.526336669921875, 12.169990539550781, 170.744384765625, 224.88864135742188, 122.84368896484375, 255.46151733398438, 52.92510986328125, 174.07101440429688, 303.2509765625, 208.91421508789062, 269.84417724609375, -117.73382568359375, -26.872222900390625, -35.07916259765625, 54.471099853515625, -6.78765869140625, 11.03607177734375, 212.69366455078125, 148.37686157226562, 84.2489013671875, 9.821403503417969, 38.86757278442383, 20.091217041015625, 233.27606201171875, -1.9537734985351562, 12.401351928710938, 34.420806884765625, 148.22169494628906, 266.95721435546875, 16.742645263671875, 13.22625732421875, -65.44349670410156, -38.331817626953125, 20.85112762451172, -15.301727294921875, 97.4332275390625, 30.73455810546875, 91.42683410644531, 134.70855712890625, 42.97686767578125, 25.976913452148438, -108.19464111328125, 58.6507568359375, 66.12801361083984, 126.79443359375, 279.3961181640625, 342.11846923828125, 94.09878540039062, 21.218292236328125, 86.15090942382812, 72.1268310546875, 8.946231842041016, -87.83419799804688, 64.44638061523438, 201.22164916992188, 73.9375, 15.277801513671875, 3.0883750915527344, 146.298828125, 170.56944274902344, 12.69805908203125, 41.607269287109375, -25.57787322998047, -80.77413940429688, 34.680450439453125, 171.07473754882812, 143.91473388671875, 15.776321411132812, 257.5168762207031, 28.175201416015625, 64.80816650390625, 187.3382110595703, 25.129642486572266, 54.78337097167969, 347.733642578125, 217.60939025878906, 48.9404296875, 6.342620849609375, 50.181610107421875, -24.552276611328125, -2.7909698486328125, 72.0948715209961, 77.358642578125, 25.386886596679688, -5.5610504150390625, -21.927974700927734, 139.26495361328125, 290.6477966308594, 129.69024658203125, 54.77593994140625, 25.479934692382812, 73.20515441894531, 160.67845153808594, 24.692413330078125, 99.5787353515625, 25.55560302734375, -0.395965576171875, 82.27099609375, 121.96339416503906, -25.683395385742188, 87.55438232421875, 198.55426025390625, -34.71263122558594, -8.047689437866211, -45.99102783203125, 13.160232543945312, -15.345458984375, 155.16668701171875, 17.526229858398438, 89.41552734375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000395.npy"}
{"epoch": 0.8272251308900523, "step": 396, "batch_size": 128, "mean": 63.47413635253906, "std": 94.09130859375, "min": -134.732421875, "p10": -63.6904426574707, "median": 62.421775817871094, "p90": 178.65790100097655, "max": 295.3699951171875, "pos_frac": 0.6953125, "sample": [158.962158203125, 55.900360107421875, -10.25537109375, 260.4854736328125, 101.11270141601562, 177.47268676757812, -54.53727722167969, 62.27378845214844, 172.66470336914062, -40.00506591796875, 25.650840759277344, 135.2143096923828, 7.7449951171875, 53.29437255859375, 128.31027221679688, 228.41622924804688, 85.15390014648438, -70.03424072265625, 151.8795166015625, -110.44461059570312, 166.10992431640625, 111.20176696777344, -9.316375732421875, 226.9150390625, 69.57769775390625, 132.34664916992188, 16.094970703125, 129.93365478515625, 175.89926147460938, 219.9000244140625, 37.81471252441406, 120.634033203125, 152.53924560546875, -65.84829711914062, -7.3719024658203125, 41.82111358642578, 48.86614990234375, -11.885040283203125, -62.765647888183594, 76.11194610595703, -36.86791229248047, 20.650279998779297, -9.8538818359375, 295.3699951171875, 143.19345092773438, 30.893768310546875, 52.13677215576172, 108.36776733398438, 133.82655334472656, -19.320358276367188, 159.23455810546875, -27.126983642578125, 184.39813232421875, 171.78726196289062, 31.255615234375, -127.41995239257812, 72.0404052734375, 124.58676147460938, 26.40576934814453, 103.19049072265625, 0.0, 132.6913604736328, 165.01239013671875, -41.324188232421875, -116.60543823242188, 215.98782348632812, 107.48960876464844, -100.79985046386719, 45.763916015625, 120.77824401855469, 135.04034423828125, 167.0843963623047, -98.98054504394531, -18.037689208984375, 108.65557861328125, 116.37246704101562, 144.5860137939453, -44.77699279785156, 50.1923828125, -39.84648895263672, 39.11682891845703, 81.20796966552734, 109.62332153320312, 81.90069580078125, 84.89297485351562, 140.2152099609375, -77.69989013671875, 228.2900390625, -3.8029537200927734, 206.78201293945312, 62.56976318359375, 288.4080810546875, 14.45831298828125, -66.6954345703125, 59.987457275390625, 204.37020874023438, 3.9330902099609375, 117.13394165039062, -27.30036163330078, 13.6243896484375, 181.42340087890625, 23.44403076171875, -13.463275909423828, 69.85897827148438, 67.43743896484375, 23.78681182861328, 66.02386474609375, -5.489326477050781, 109.53474426269531, -36.206573486328125, -43.060791015625, -25.419189453125, 119.41729736328125, 111.6737060546875, -5.65863037109375, -134.732421875, 181.84283447265625, 97.78054809570312, 0.0, 126.62252807617188, -79.22665405273438, -0.41767120361328125, 24.22658920288086, 25.313446044921875, 151.26153564453125, -92.77383422851562, -68.18367004394531, 114.78775024414062], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000396.npy"}
{"epoch": 0.8293193717277487, "step": 397, "batch_size": 128, "mean": 55.47074890136719, "std": 103.62250518798828, "min": -168.84890747070312, "p10": -77.28587646484374, "median": 41.00825500488281, "p90": 188.4715148925781, "max": 398.64007568359375, "pos_frac": 0.6796875, "sample": [-1.98199462890625, 11.134735107421875, -3.0167007446289062, 82.93529510498047, 199.2285614013672, -37.34814453125, 18.23211669921875, 7.328422546386719, 54.84236145019531, 153.59637451171875, 188.35247802734375, 92.44286346435547, 143.129638671875, -85.52337646484375, 398.64007568359375, -87.6484375, 19.165283203125, 34.53733444213867, 161.11691284179688, 1.7896270751953125, 140.0835418701172, 21.78473663330078, 177.03335571289062, 39.12298583984375, 182.95223999023438, 116.87806701660156, 301.0128479003906, 204.60140991210938, 85.40396118164062, 17.6712646484375, 73.50698852539062, 39.81109619140625, 188.749267578125, -1.1324310302734375, -25.68494987487793, 72.17135620117188, 22.543670654296875, 122.85966491699219, -96.1671142578125, 41.588287353515625, -168.84890747070312, 71.92088317871094, -12.158721923828125, 90.19671630859375, 156.78558349609375, -156.76730346679688, -46.22589111328125, -52.54718017578125, -16.47296142578125, -2.1680755615234375, 225.1186065673828, 218.63751220703125, 21.98077392578125, -82.5438232421875, 73.75042724609375, 16.86209487915039, 44.446807861328125, -8.2518310546875, 130.05467224121094, -27.08184814453125, -115.21441650390625, -5.920867919921875, 142.93548583984375, 8.336158752441406, -17.77320098876953, 74.6494140625, -34.97691345214844, 42.24591064453125, 149.82061767578125, 40.42822265625, 183.070068359375, 31.000213623046875, 46.300628662109375, 108.39364624023438, 0.0, -5.18865966796875, 88.65695190429688, -158.09967041015625, 8.049983978271484, -105.76690673828125, 146.237548828125, 171.15243530273438, 123.7370376586914, 116.09283447265625, 256.1241760253906, 169.01754760742188, 21.48711395263672, -57.310577392578125, 12.662368774414062, 141.46697998046875, 102.6844482421875, -12.108184814453125, -55.447265625, -120.566162109375, 139.92941284179688, 105.04927825927734, -45.844970703125, 33.70782470703125, 71.96748352050781, 62.90666198730469, -43.04571533203125, 39.7161865234375, 90.08062744140625, 257.559326171875, 195.42593383789062, 67.44903564453125, 0.0, -75.032470703125, -85.62957763671875, 190.85293579101562, 266.33135986328125, 129.4250946044922, 178.5306396484375, 54.8209228515625, -125.524658203125, 49.602630615234375, -10.484817504882812, -72.24325561523438, -120.99443054199219, 177.524658203125, 63.75733947753906, 18.432899475097656, 192.24078369140625, 121.63299560546875, -11.81768798828125, -73.58233642578125, 31.73394775390625, 177.1697540283203], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000397.npy"}
{"epoch": 0.831413612565445, "step": 398, "batch_size": 128, "mean": 50.66444396972656, "std": 115.74591064453125, "min": -472.8895263671875, "p10": -85.30782165527344, "median": 45.75074768066406, "p90": 187.45789031982423, "max": 303.305908203125, "pos_frac": 0.703125, "sample": [88.33160400390625, 123.12164306640625, 157.61058044433594, 3.709747314453125, 129.0794677734375, -205.73687744140625, -129.49734497070312, 41.30940246582031, 127.41549682617188, -3.0296478271484375, 233.69122314453125, 174.11660766601562, 37.907196044921875, 7.5118408203125, 1.93292236328125, 45.970916748046875, 111.01300048828125, 10.995010375976562, 18.142314910888672, -59.92919921875, 169.3109130859375, -33.079986572265625, -23.593170166015625, 103.1558837890625, 212.63555908203125, 173.094970703125, -7.5530853271484375, 121.31983947753906, 162.01141357421875, -52.00457763671875, 134.23016357421875, 41.1610107421875, 303.305908203125, -29.9237060546875, 182.133056640625, 82.19717407226562, 67.5418701171875, 152.68418884277344, 157.1732177734375, 30.591144561767578, 187.75729370117188, -29.093978881835938, 178.6356201171875, 178.98399353027344, -0.38573455810546875, 34.20823669433594, 93.4880142211914, 85.57183837890625, 91.96067810058594, 178.09048461914062, 194.95091247558594, 215.89208984375, -86.078125, -45.142425537109375, 159.83319091796875, 78.28755187988281, 29.156585693359375, 40.84619140625, -136.1497344970703, 83.34805297851562, -29.73663330078125, -129.1783447265625, 21.513099670410156, 177.54425048828125, 70.00279235839844, 234.98703002929688, -79.72232055664062, -71.94607543945312, 1.6362533569335938, 14.0618896484375, 26.296951293945312, 140.62759399414062, 83.42010498046875, 121.66558837890625, 69.80040740966797, 119.14883422851562, 0.2978534698486328, -121.12149047851562, -48.92842102050781, 12.010732650756836, 0.937103271484375, 222.02017211914062, -84.97769165039062, -87.85208129882812, 45.53057861328125, -48.00731658935547, 187.32957458496094, 200.91976928710938, -2.6673583984375, -185.36297607421875, 183.66220092773438, 192.4586181640625, 110.0535888671875, 12.79949951171875, 19.833778381347656, 149.7951202392578, -198.3545379638672, 62.259429931640625, 7.025993347167969, 49.64244079589844, -11.001785278320312, -160.53897094726562, 44.72248840332031, 130.88656616210938, 130.12286376953125, 53.64256286621094, -28.07891845703125, 73.73046875, 46.01654052734375, 74.35025024414062, 254.421630859375, -121.8270263671875, 21.550861358642578, 183.01141357421875, -108.43194580078125, -65.41252136230469, 215.5004425048828, -9.033966064453125, -472.8895263671875, 262.038330078125, -13.836273193359375, -54.561767578125, -54.1236572265625, -59.69244384765625, 122.56448364257812, 68.29725646972656, 115.99044036865234, 3.986236572265625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000398.npy"}
{"epoch": 0.8335078534031414, "step": 399, "batch_size": 128, "mean": 60.86689376831055, "std": 96.44477844238281, "min": -161.67782592773438, "p10": -65.8908935546875, "median": 56.5233039855957, "p90": 199.65260162353513, "max": 282.36090087890625, "pos_frac": 0.6875, "sample": [-31.949050903320312, 215.24188232421875, 89.79681396484375, 133.42401123046875, -76.7533950805664, 170.7474365234375, 18.784988403320312, -4.2084503173828125, 42.989288330078125, 34.97161865234375, 139.0522003173828, 64.36190795898438, 133.54388427734375, 98.06201171875, -87.46661376953125, 71.27403259277344, 57.389808654785156, -19.158870697021484, 238.34039306640625, 100.28022766113281, -89.42581939697266, -14.738288879394531, 211.57554626464844, -0.6803665161132812, 58.10845947265625, 169.87274169921875, 106.1722412109375, 142.81268310546875, -78.9822998046875, -8.725753784179688, 17.692907333374023, 68.73735809326172, 37.65460205078125, -39.235469818115234, 98.82350158691406, 47.98968505859375, 130.62255859375, -68.16300964355469, -18.804183959960938, 218.620361328125, -43.177642822265625, -42.314208984375, -42.3162841796875, 113.6639404296875, 57.48548889160156, 112.8953857421875, 243.04296875, -15.250274658203125, 120.26057434082031, 110.05523681640625, 268.3939514160156, 61.130462646484375, 119.81805419921875, 64.80548095703125, 128.57369995117188, -71.0584716796875, 136.9776611328125, 241.70819091796875, 52.806549072265625, -20.968917846679688, -2.790435791015625, 142.4627685546875, -2.6603546142578125, 72.95831298828125, 79.96038818359375, 234.78244018554688, 164.34921264648438, -36.37904357910156, -11.26959228515625, 156.04855346679688, 205.8309326171875, 208.37571716308594, 25.76068115234375, -29.98089599609375, 15.2392578125, -99.08641052246094, 20.931249618530273, -0.37664794921875, -5.213981628417969, -34.13193893432617, -31.096481323242188, -10.604339599609375, -138.47027587890625, -68.71186828613281, 55.65679931640625, 197.00474548339844, -64.91712951660156, 134.69268798828125, 122.04766845703125, 36.2635498046875, 147.83248901367188, 110.035400390625, 43.58038330078125, 27.6207275390625, 26.679092407226562, -11.663167953491211, -161.67782592773438, 190.52325439453125, 19.313859939575195, 282.36090087890625, 10.865478515625, 37.614402770996094, 141.60971069335938, -88.5478515625, -148.00949096679688, 107.01844787597656, 117.20074462890625, 168.19744873046875, 15.256561279296875, 224.4180908203125, 54.69017028808594, 21.418472290039062, -1.4290771484375, 35.62156677246094, 169.40899658203125, 163.18756103515625, 102.45391845703125, 62.1990966796875, 248.17169189453125, -118.61798095703125, 5.438018798828125, 129.459716796875, 161.07437133789062, 66.44416809082031, -47.05889892578125, 35.111602783203125, 70.42794799804688, 60.87330627441406], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000399.npy"}
{"epoch": 0.8356020942408376, "step": 400, "batch_size": 128, "mean": 60.30166244506836, "std": 100.24059295654297, "min": -166.73509216308594, "p10": -74.59018554687499, "median": 55.15242004394531, "p90": 189.34897308349608, "max": 296.1493835449219, "pos_frac": 0.7109375, "sample": [27.793060302734375, -21.046798706054688, -24.110870361328125, 45.720947265625, -51.174224853515625, 125.62727355957031, 48.87178039550781, 84.470703125, 154.31915283203125, 101.24539184570312, 88.89585876464844, 98.18206787109375, 90.034423828125, -19.530792236328125, 46.91748046875, 172.28648376464844, 32.4356689453125, -118.18008422851562, 56.531402587890625, 36.53358459472656, 17.669830322265625, 195.79364013671875, -6.135141372680664, -100.2174072265625, -21.151535034179688, 174.018798828125, 82.43557739257812, 34.53607177734375, 85.76309204101562, -48.28912353515625, 157.1441650390625, 84.13357543945312, 101.016357421875, 87.62550354003906, 63.61158752441406, 78.55418395996094, 191.02667236328125, 219.67547607421875, 8.067230224609375, 123.2735824584961, 180.2359619140625, -50.11235046386719, -7.974241256713867, 25.873504638671875, 126.37071228027344, -79.02728271484375, 52.27642822265625, 2.8228607177734375, 212.02218627929688, 162.97027587890625, 145.70306396484375, 27.178749084472656, 75.179443359375, 188.6299591064453, 129.41580200195312, 62.22821044921875, -19.315284729003906, 187.05426025390625, 148.87606811523438, -120.08663940429688, 147.59231567382812, -89.51324462890625, 244.14794921875, 229.87899780273438, 4.29107666015625, 36.824562072753906, 62.79810333251953, 157.56317138671875, -166.73509216308594, 296.1493835449219, 141.52896118164062, 258.9033203125, 153.10385131835938, 274.8188781738281, -155.46282958984375, 55.9796142578125, 54.04866409301758, 42.64862060546875, 218.95822143554688, -49.38409423828125, -151.13023376464844, -8.788589477539062, -27.910568237304688, -130.45263671875, -2.6121826171875, 17.8177490234375, 175.598876953125, 7.616100311279297, 117.03369140625, 75.6478042602539, 47.47270584106445, -12.9437255859375, 170.20223999023438, 85.53021240234375, 0.0, -92.86163330078125, -139.75686645507812, -12.90155029296875, 168.80563354492188, 93.431884765625, 28.2767333984375, 235.12277221679688, 38.99310302734375, 54.325225830078125, -16.21270751953125, 130.62713623046875, 51.53221130371094, 17.181488037109375, -6.93391227722168, 67.20132446289062, 93.722900390625, 157.15151977539062, 207.53878784179688, 236.0631561279297, 81.60578918457031, 11.9207763671875, -75.63552856445312, 78.97015380859375, 148.8304443359375, -46.20330810546875, -43.38325500488281, -74.14218139648438, 5.66534423828125, 77.38690185546875, -72.22781372070312, -1.836395263671875, -79.70394897460938, 160.14022827148438], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000400.npy"}
{"epoch": 0.837696335078534, "step": 401, "batch_size": 128, "mean": 65.87191772460938, "std": 94.57711029052734, "min": -189.14646911621094, "p10": -33.80690231323241, "median": 62.267051696777344, "p90": 175.45534057617186, "max": 369.8738098144531, "pos_frac": 0.765625, "sample": [-10.851280212402344, -19.122344970703125, 179.19357299804688, -70.651611328125, 140.9807586669922, 78.22314453125, 17.41827392578125, 31.597259521484375, 0.668212890625, 79.77330017089844, -28.144439697265625, 124.61238098144531, 152.43032836914062, 200.701904296875, 10.67547607421875, 136.26962280273438, 21.02252197265625, 122.70152282714844, -78.02011108398438, 45.79051971435547, -74.98580932617188, 142.9368896484375, 265.7816162109375, 37.70074462890625, 66.840087890625, -64.55119323730469, 174.43817138671875, 110.4654541015625, -27.676116943359375, 1.6772880554199219, 54.643836975097656, 3.9720306396484375, 49.5599365234375, 154.26766967773438, 62.6287841796875, 90.18756103515625, 160.3257598876953, 180.76119995117188, 60.36541748046875, 121.88304901123047, -8.800712585449219, 95.33535766601562, -32.38542938232422, -29.1033935546875, 73.15133666992188, 105.74897003173828, -162.91885375976562, 165.89031982421875, 80.35557556152344, -0.594207763671875, -25.61819076538086, -50.018341064453125, 6.63116455078125, 24.890899658203125, 215.96775817871094, 74.69542694091797, 115.97698974609375, 107.218017578125, 9.989471435546875, 4.273193359375, 92.8670654296875, 83.23892974853516, 61.90531921386719, -80.5956039428711, -21.971832275390625, -19.586288452148438, 127.89370727539062, 87.83236694335938, 295.60400390625, 79.86749267578125, 118.24478149414062, 95.38372802734375, -177.06350708007812, 267.0173645019531, 171.777099609375, 10.384672164916992, 16.19696044921875, 100.10906982421875, 58.57093811035156, 8.129112243652344, 153.86859130859375, 23.61322021484375, 54.981201171875, 39.186737060546875, 134.16641235351562, 36.16023254394531, -19.03387451171875, 33.51579284667969, 71.07684326171875, 111.61962890625, 96.29739379882812, 79.77085876464844, 126.6243896484375, 138.865478515625, 177.8287353515625, -5.6866607666015625, 86.00299835205078, 49.29534912109375, 35.04327392578125, 21.276885986328125, 165.13723754882812, 79.13858032226562, -49.425086975097656, -9.380783081054688, 113.2503662109375, 41.421905517578125, 126.05657958984375, 297.914794921875, -37.12367248535156, 119.87289428710938, 125.02313232421875, 109.949462890625, 200.26968383789062, 337.642822265625, 82.54049682617188, 25.236515045166016, -12.71420669555664, 178.37701416015625, 22.783721923828125, 7.66412353515625, -8.34161376953125, -58.12255859375, -4.513542175292969, 13.600479125976562, 369.8738098144531, -189.14646911621094, -60.21088409423828, 147.37457275390625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000401.npy"}
{"epoch": 0.8397905759162304, "step": 402, "batch_size": 128, "mean": 62.06712341308594, "std": 95.4703369140625, "min": -170.209228515625, "p10": -38.019799804687494, "median": 52.753482818603516, "p90": 180.437451171875, "max": 354.4473876953125, "pos_frac": 0.75, "sample": [-46.28419876098633, -31.376461029052734, -22.529510498046875, 55.54571533203125, 22.303421020507812, 191.2992401123047, 17.50335693359375, 0.0, 28.369827270507812, -10.250137329101562, 61.27313232421875, -35.934814453125, 187.91680908203125, 111.72976684570312, 38.445068359375, -57.360816955566406, 119.28913879394531, -27.622314453125, 2.2991943359375, 68.6258773803711, 14.295024871826172, 180.4183349609375, 6.9925537109375, 171.58895874023438, 227.8367919921875, 136.27291870117188, 27.99383544921875, 167.0184783935547, 56.03875732421875, 229.13095092773438, -29.50018310546875, 17.65374755859375, 143.94845581054688, -42.884765625, -71.99676513671875, -126.79568481445312, 354.4473876953125, 87.67678833007812, -62.78399658203125, 180.4820556640625, -5.7894134521484375, 4.8603668212890625, 120.74757385253906, 189.26763916015625, -21.250411987304688, 116.90069580078125, 97.60018920898438, 39.13725280761719, 52.99250030517578, -97.1815185546875, 115.09835815429688, 135.07618713378906, 167.44671630859375, 79.87959289550781, 44.7816162109375, 27.770355224609375, -30.892913818359375, 74.471435546875, 220.54888916015625, 68.427978515625, -4.739898681640625, 173.94882202148438, 124.06365966796875, 15.217803955078125, 94.82351684570312, -114.65455627441406, 9.46636962890625, 264.30548095703125, 79.02011108398438, 52.51446533203125, 105.19343566894531, 13.769134521484375, -164.2393035888672, 8.665651321411133, 33.05963134765625, 250.56857299804688, 164.2133331298828, 90.23466491699219, 6.412139892578125, 113.43647766113281, 68.26057434082031, -31.1829833984375, 86.83203887939453, -18.137664794921875, 347.15557861328125, -60.399993896484375, 83.55709838867188, 17.86700439453125, -7.2950286865234375, 173.435546875, 31.525062561035156, 76.23809814453125, 63.0811767578125, 64.98239135742188, 4.034595489501953, -19.60089111328125, 53.017364501953125, -13.543914794921875, -6.570716857910156, 133.46661376953125, 27.270523071289062, 170.32119750976562, -45.30339050292969, -50.79594421386719, -170.209228515625, 50.8804931640625, 100.09375, 2.665252685546875, 16.899707794189453, 332.138916015625, 174.44302368164062, 23.362930297851562, 238.37249755859375, -0.6487579345703125, 79.30059814453125, 112.401611328125, 28.1221923828125, 53.68280029296875, 60.8929443359375, 147.00363159179688, 77.33273315429688, 19.88532257080078, 55.1109619140625, 150.24920654296875, 18.18115234375, -1.180389404296875, 160.85057067871094, 10.297475814819336], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000402.npy"}
{"epoch": 0.8418848167539267, "step": 403, "batch_size": 128, "mean": 81.18905639648438, "std": 95.19469451904297, "min": -151.6762237548828, "p10": -22.249119949340816, "median": 64.0282974243164, "p90": 203.36986999511717, "max": 404.5921630859375, "pos_frac": 0.8046875, "sample": [18.230514526367188, 114.61027526855469, 35.80364990234375, 151.75985717773438, 1.9437942504882812, 38.168155670166016, 56.19042205810547, 69.17798614501953, 27.841110229492188, 206.15451049804688, 51.798675537109375, -29.298561096191406, 228.17794799804688, 206.654052734375, 44.07049560546875, 34.95408630371094, 130.49847412109375, 35.10187530517578, 164.07952880859375, -41.99634552001953, 42.76434326171875, 144.9708709716797, 167.2803192138672, 66.16981506347656, 51.858314514160156, 94.1483154296875, 258.8200378417969, 129.63107299804688, 82.594970703125, 39.466941833496094, -121.08026123046875, 226.88180541992188, 121.594970703125, 125.4449462890625, 115.51065063476562, -21.309764862060547, 32.94024658203125, 123.32891845703125, 0.0, -2.18548583984375, 212.4943389892578, -7.129404067993164, -151.6762237548828, 136.49432373046875, 6.61492919921875, 9.698112487792969, 190.7774658203125, 21.17041015625, 58.583778381347656, 3.053314208984375, 58.662353515625, 87.55447387695312, 145.1089630126953, 5.804969787597656, 152.3297576904297, -16.816436767578125, 228.6936798095703, -48.65362548828125, 4.5031280517578125, 135.32598876953125, -51.90455627441406, 17.028289794921875, 54.339298248291016, 17.25014877319336, 45.211517333984375, -20.940826416015625, 162.83013916015625, 134.39340209960938, -1.3720016479492188, 220.4408721923828, 154.00457763671875, 123.87918853759766, 149.0171356201172, 175.4104766845703, 141.4302978515625, 39.9410400390625, 35.36077880859375, -57.8466796875, 107.24578857421875, -52.0157470703125, 64.67005920410156, 192.6204833984375, -3.12896728515625, 120.06394958496094, -129.99447631835938, 231.22842407226562, -31.68475341796875, 57.599578857421875, 134.5833740234375, 120.81138610839844, 343.38775634765625, -13.115859985351562, 8.97467041015625, 141.7882843017578, 79.10671997070312, 62.74530029296875, 77.44415283203125, -1.0595703125, 38.25605773925781, 111.20361328125, 4.1990966796875, 178.17578125, 193.7571563720703, 25.0552978515625, 22.68133544921875, 202.17645263671875, 197.92050170898438, -15.2706298828125, -24.440948486328125, 79.23822021484375, -108.63762664794922, 404.5921630859375, 174.0178985595703, 211.92007446289062, 62.69015121459961, 93.0108642578125, -3.836793899536133, 193.56195068359375, -62.026275634765625, 108.13751220703125, 298.2420654296875, 177.620361328125, 22.444549560546875, 136.69839477539062, 41.8885498046875, 84.66749572753906, 175.77978515625, 63.38653564453125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000403.npy"}
{"epoch": 0.8439790575916231, "step": 404, "batch_size": 128, "mean": 64.17402648925781, "std": 102.3615951538086, "min": -190.80203247070312, "p10": -62.43514442443848, "median": 57.888702392578125, "p90": 187.84149169921875, "max": 337.794189453125, "pos_frac": 0.7578125, "sample": [197.76905822753906, 169.73812866210938, 20.08648681640625, 117.82624053955078, 54.335693359375, 21.62933349609375, -36.79054260253906, 1.6350345611572266, 25.641693115234375, 188.38922119140625, 3.2601852416992188, -11.49951171875, 37.95954132080078, 73.2001953125, 43.50408935546875, 4.8712005615234375, 0.45941162109375, 129.11013793945312, 195.0265655517578, 38.219482421875, 77.94355773925781, 124.63618469238281, 89.64140319824219, 113.010986328125, 36.61302185058594, 12.231887817382812, 39.38165283203125, 79.5074462890625, 147.36830139160156, 242.69070434570312, 77.59808349609375, -17.113235473632812, -113.67889404296875, 205.58621215820312, 183.25115966796875, 63.496307373046875, -62.39712142944336, 29.297470092773438, 306.437744140625, 84.342529296875, 240.9366455078125, 18.3924560546875, 16.064178466796875, 40.0286865234375, 126.81942749023438, -101.94512939453125, -9.565597534179688, 204.2640380859375, -25.747787475585938, 175.276123046875, 114.6112289428711, 21.466100692749023, -42.251747131347656, 35.99542236328125, 139.02459716796875, 129.9178466796875, 114.97244262695312, 42.90577697753906, -33.801116943359375, 63.501708984375, 137.41680908203125, -27.518516540527344, 190.13449096679688, 150.43392944335938, -108.67687225341797, 0.0, -102.49591064453125, 172.9322509765625, 170.70916748046875, -10.206222534179688, 54.42938232421875, -67.33822631835938, -1.2936553955078125, 146.29763793945312, -190.80203247070312, 222.01524353027344, -132.90370178222656, 47.374359130859375, -4.0497894287109375, 99.7703628540039, 30.582305908203125, 49.580589294433594, 35.621826171875, -42.9984130859375, 109.30789184570312, 81.96096801757812, -33.87407684326172, 105.41714477539062, 169.39410400390625, -160.334228515625, 7.6646881103515625, 81.58523559570312, 66.0372314453125, 9.956916809082031, 127.47071838378906, 152.5965576171875, 106.98638153076172, 11.096343994140625, -190.659912109375, 143.4859161376953, 152.03594970703125, 34.0750732421875, 126.0257568359375, 60.046356201171875, -88.0712890625, 93.52522277832031, -2.441802978515625, -62.52386474609375, 178.4061279296875, 156.66439819335938, -49.27915954589844, 55.731048583984375, 19.810516357421875, -29.4019775390625, 148.95022583007812, 75.40834045410156, -162.90309143066406, 301.65533447265625, 75.07577514648438, 185.79461669921875, 80.03915405273438, 257.3248291015625, -104.12156677246094, 3.682220458984375, 187.60675048828125, 143.2129669189453, 167.9640350341797, 337.794189453125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000404.npy"}
{"epoch": 0.8460732984293193, "step": 405, "batch_size": 128, "mean": 68.33182525634766, "std": 96.48275756835938, "min": -133.64581298828125, "p10": -40.177618408203124, "median": 56.38397979736328, "p90": 199.59972229003904, "max": 286.8486328125, "pos_frac": 0.71875, "sample": [27.27490234375, -13.647918701171875, 57.660064697265625, 0.0, -35.948333740234375, -1.4034576416015625, 286.8486328125, 153.6240997314453, -132.41763305664062, 39.766815185546875, 191.09603881835938, 271.5299072265625, 170.40481567382812, -51.21197509765625, 38.07179260253906, 94.16058349609375, 65.26309204101562, 4.779052734375, 99.06826782226562, -18.92767333984375, -20.758560180664062, -37.47997283935547, 188.63421630859375, 92.79421997070312, 155.85568237304688, -11.758890151977539, -64.94757080078125, -130.65365600585938, 189.63165283203125, -22.104063034057617, 13.143646240234375, -14.733657836914062, -40.59832763671875, -21.35235595703125, 44.462158203125, 133.901123046875, 59.78584289550781, 21.2293701171875, -3.800039291381836, 119.85816192626953, 122.5367431640625, 176.33966064453125, 14.628244400024414, -55.25096893310547, 229.49087524414062, 146.53282165527344, 30.6268310546875, -27.49749755859375, -3.55743408203125, 196.6531982421875, 25.132644653320312, 99.0391845703125, -133.64581298828125, 110.73382568359375, 67.75469970703125, 245.705078125, 51.31005859375, 30.707000732421875, -0.7569580078125, 249.14010620117188, 29.78778076171875, 34.881622314453125, -25.877593994140625, 22.877735137939453, 135.37652587890625, 80.7054443359375, -5.065216064453125, -36.000152587890625, -12.310001373291016, 264.6550598144531, 80.35950469970703, -4.359956741333008, 107.43220520019531, 101.87385559082031, -49.4049072265625, 80.9849853515625, 129.32015991210938, 159.02694702148438, 53.33885192871094, -43.10235595703125, 94.2742919921875, 154.59402465820312, 152.9112548828125, 210.43359375, -4.3108367919921875, 45.22502136230469, 36.760467529296875, 145.633056640625, 55.65806579589844, 118.41415405273438, 206.47494506835938, 17.84716033935547, 57.109893798828125, 183.93292236328125, 148.40850830078125, -23.30322265625, 68.06204986572266, 52.26849365234375, 137.50042724609375, -39.997314453125, 35.884735107421875, 280.71392822265625, 210.563232421875, 13.57642936706543, 228.30670166015625, 90.04408264160156, 138.03562927246094, 88.9480972290039, -124.38965606689453, 111.92557525634766, 107.44546508789062, -85.76760864257812, -109.44419860839844, 156.35670471191406, 76.05430603027344, 74.87651062011719, 142.16354370117188, 39.545013427734375, 96.23388671875, 47.21452713012695, -125.69271850585938, 17.39202880859375, 246.13766479492188, 118.87686157226562, 14.149383544921875, 283.2485656738281, 42.45587158203125, 136.49899291992188], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000405.npy"}
{"epoch": 0.8481675392670157, "step": 406, "batch_size": 128, "mean": 49.349586486816406, "std": 88.58106994628906, "min": -167.936767578125, "p10": -66.0358673095703, "median": 36.000980377197266, "p90": 176.5018768310547, "max": 264.34722900390625, "pos_frac": 0.7265625, "sample": [181.4381103515625, 29.107040405273438, -63.452972412109375, 120.72412109375, -167.936767578125, -41.407135009765625, 70.67108154296875, -78.0989990234375, 218.3192138671875, -15.946907043457031, 51.77995300292969, 76.51388549804688, 164.0335693359375, 96.6322250366211, -23.388153076171875, 55.80757141113281, 48.07408142089844, 59.168731689453125, 29.909637451171875, -20.032623291015625, 132.82882690429688, -24.671884536743164, 140.08428955078125, 43.6646728515625, 166.75534057617188, -12.254436492919922, -156.39584350585938, 25.54102897644043, -25.705322265625, 27.65423583984375, 206.15362548828125, -1.2444229125976562, -60.62890625, 184.286376953125, 66.53927612304688, 130.19654846191406, 48.46919250488281, -17.221107482910156, 9.967910766601562, 10.869354248046875, -23.806060791015625, -73.2890625, 31.209259033203125, 62.960723876953125, 94.07351684570312, 32.94587707519531, -125.52203369140625, 32.25311279296875, 33.87298583984375, 108.80389404296875, 216.63394165039062, 63.376670837402344, 25.31536865234375, 29.28570556640625, 176.29876708984375, 47.180419921875, 208.19944763183594, 190.96395874023438, 87.7640380859375, -85.15647888183594, 7.366153717041016, -39.771995544433594, -8.74249267578125, -44.186279296875, 98.49942779541016, 171.33416748046875, -8.366409301757812, 3.5788841247558594, -121.58859252929688, -72.0626220703125, -29.23382568359375, -80.63818359375, 135.57443237304688, 7.395843505859375, -5.590202331542969, 123.4732666015625, 90.93453979492188, 129.3921661376953, -15.129936218261719, 83.04962158203125, 23.05657958984375, 19.347900390625, 176.97579956054688, 110.21888732910156, 53.377532958984375, 29.865234375, -88.8101806640625, 201.10324096679688, 114.38580322265625, 9.6397705078125, 97.23461151123047, 26.52103042602539, -12.588165283203125, 35.449798583984375, 23.5784912109375, -63.40679931640625, -25.5179443359375, 197.49493408203125, 76.79385375976562, -127.99029541015625, 57.97657775878906, 67.07975769042969, 17.826446533203125, 151.31246948242188, 1.869781494140625, 105.85235595703125, 141.752685546875, 217.5826416015625, 7.782562255859375, 231.50814819335938, -87.30195617675781, 102.15130615234375, 36.552162170410156, 61.66278076171875, 9.187339782714844, 126.44992065429688, 30.108200073242188, 71.990966796875, 74.71087646484375, 67.09619140625, 123.25202941894531, 169.2545166015625, 27.747802734375, 264.34722900390625, 135.82766723632812, -101.5211181640625, 28.632949829101562, 53.86888122558594], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000406.npy"}
{"epoch": 0.8502617801047121, "step": 407, "batch_size": 128, "mean": 69.55550384521484, "std": 102.0059814453125, "min": -196.6995849609375, "p10": -39.94913024902344, "median": 45.14509582519531, "p90": 220.48991088867186, "max": 308.61907958984375, "pos_frac": 0.703125, "sample": [9.209461212158203, 85.28274536132812, 26.29595947265625, 59.53788757324219, -56.833160400390625, 27.81390380859375, 121.64869689941406, 119.230712890625, 30.1964111328125, -11.43243408203125, 47.8345947265625, -27.25079345703125, 212.05181884765625, -18.330322265625, -11.775421142578125, 108.49212646484375, 167.0576171875, -10.701446533203125, 39.30796813964844, -96.1810302734375, -39.810546875, 9.16192626953125, 55.415740966796875, 93.11862182617188, 235.75717163085938, -16.980831146240234, 180.2260284423828, -67.9735107421875, 242.03973388671875, -70.78858947753906, 290.6815185546875, -56.32756805419922, 222.25054931640625, 18.320968627929688, 161.138916015625, 233.72470092773438, 125.2176513671875, 219.7353515625, -26.862274169921875, 30.029266357421875, 42.455596923828125, -0.9772491455078125, 42.12530517578125, 101.17384338378906, 25.97735595703125, 308.61907958984375, 81.20169067382812, 176.8131103515625, 76.37754821777344, 50.38739013671875, 23.805206298828125, 27.555328369140625, 177.835205078125, -10.055557250976562, 174.14947509765625, -149.01025390625, 197.45077514648438, -196.6995849609375, 281.23785400390625, 48.70745849609375, 66.861083984375, -43.74784851074219, 53.58873748779297, 260.2275390625, -2.56427001953125, -10.917129516601562, -8.983909606933594, 73.92111206054688, 96.25390625, -21.5850830078125, 117.96206665039062, 40.47795867919922, 203.37979125976562, 15.7852783203125, -6.2120361328125, 29.770179748535156, -23.363555908203125, 24.098709106445312, 22.562530517578125, 156.7666015625, 223.03721618652344, 126.11065673828125, 222.5640869140625, 12.126964569091797, 120.397216796875, -100.70010375976562, 113.63168334960938, 198.66143798828125, -23.3638916015625, 60.14849853515625, 234.9443359375, 7.20855712890625, 157.68032836914062, 212.82379150390625, 20.64178466796875, -2.9454879760742188, -1.5648269653320312, -5.730255126953125, -122.78240966796875, 186.29464721679688, -40.272491455078125, 232.87161254882812, 165.41668701171875, 99.95052337646484, -107.19407653808594, -33.120635986328125, -28.374893188476562, 102.1014404296875, 8.906974792480469, 67.15675354003906, 133.94175720214844, -3.6627578735351562, 110.47967529296875, 8.701194763183594, 0.0, 189.89395141601562, 257.7591552734375, -56.147125244140625, 203.11004638671875, 174.1986083984375, 8.576904296875, 122.11587524414062, 161.10748291015625, 16.783721923828125, 41.71124267578125, 97.45171356201172, 147.51541137695312, 0.0], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000407.npy"}
{"epoch": 0.8523560209424084, "step": 408, "batch_size": 128, "mean": 65.34156799316406, "std": 95.81922912597656, "min": -148.94775390625, "p10": -37.34929504394531, "median": 46.08919906616211, "p90": 196.121142578125, "max": 352.4027099609375, "pos_frac": 0.765625, "sample": [-34.221290588378906, 41.62774658203125, -66.99745178222656, 69.4398193359375, 194.4986572265625, 27.41717529296875, -113.17123413085938, 98.55299377441406, 22.947784423828125, 3.898223876953125, 62.29988098144531, -24.57936668395996, -148.94775390625, 175.89048767089844, -6.903778076171875, 48.17303466796875, -7.48370361328125, 148.52413940429688, -54.42626953125, 85.90338134765625, -78.1947021484375, 122.7098388671875, 198.60321044921875, 141.06427001953125, 182.20648193359375, 102.42618560791016, 2.3641433715820312, 45.2098388671875, 96.56681823730469, 0.0, 37.52771759033203, 201.7611083984375, 38.100685119628906, 147.56724548339844, 44.571868896484375, 204.8585205078125, 252.2261962890625, 141.91427612304688, 171.51699829101562, 42.163055419921875, -25.9095458984375, -1.6549186706542969, -27.409820556640625, -34.76573944091797, 37.081268310546875, -10.88580322265625, 102.54295349121094, 105.43350219726562, 116.10693359375, -14.866119384765625, 196.07763671875, 52.18745422363281, -117.89033508300781, 168.49160766601562, 42.457305908203125, 90.15281677246094, 35.2467041015625, 5.9627532958984375, 196.22265625, 56.34043884277344, 102.91682434082031, 2.207662582397461, -20.433151245117188, -36.25946044921875, 96.8814697265625, 178.54104614257812, -26.236221313476562, 21.556381225585938, 29.968536376953125, -113.0128173828125, -147.5638427734375, 39.49432373046875, 352.4027099609375, 33.13134002685547, 311.51312255859375, 75.55401611328125, 106.895263671875, 168.71722412109375, -69.22708129882812, 22.678802490234375, 140.15567016601562, 11.3350830078125, 102.83984375, 203.78982543945312, 107.61824035644531, -28.116302490234375, 5.8681488037109375, 44.994903564453125, 134.26910400390625, -135.94808959960938, 76.54885864257812, 44.04371643066406, 23.156494140625, 25.797607421875, 54.632171630859375, 120.54058837890625, 85.54885864257812, 133.44818115234375, -44.61810302734375, 62.99972152709961, 158.17059326171875, 94.24315643310547, -8.981277465820312, 26.837379455566406, -53.64520263671875, -16.25762939453125, 0.407745361328125, 216.78179931640625, 229.9381561279297, 118.27543640136719, 169.71258544921875, 46.183292388916016, -39.892242431640625, 51.099090576171875, 9.6712646484375, 118.71533203125, 121.68069458007812, 38.702728271484375, 21.263748168945312, 7.8336334228515625, 230.98619079589844, 46.863067626953125, 295.4637451171875, 11.055007934570312, 83.2420654296875, 192.41592407226562, 45.9951057434082, 259.8009033203125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000408.npy"}
{"epoch": 0.8544502617801047, "step": 409, "batch_size": 128, "mean": 63.91953659057617, "std": 107.6934814453125, "min": -268.34246826171875, "p10": -61.58198242187499, "median": 61.11371612548828, "p90": 202.92072143554685, "max": 386.261474609375, "pos_frac": 0.734375, "sample": [156.59967041015625, 62.36054992675781, 43.73234558105469, -58.57257080078125, 8.06982421875, 23.100372314453125, 30.99224853515625, 175.0418701171875, 62.54017639160156, 44.29278564453125, 174.9689483642578, -68.60394287109375, 85.07736206054688, 167.17913818359375, 108.52374267578125, 159.22805786132812, 169.978515625, -137.07864379882812, 31.950836181640625, 2.343048095703125, 81.24658203125, 240.8128662109375, 96.38148498535156, 105.13705444335938, 82.27041625976562, 13.102279663085938, 11.61358642578125, 0.0, -134.08056640625, 3.0588912963867188, 71.63992309570312, 56.026123046875, 195.512939453125, -268.34246826171875, 104.90843200683594, 206.47100830078125, 61.84275817871094, 36.86248779296875, 238.97283935546875, 227.01748657226562, -41.2568359375, 44.74933624267578, -0.3453693389892578, 200.06304931640625, 161.17291259765625, 9.283065795898438, 224.64617919921875, -16.8148193359375, -1.5712890625, -140.60336303710938, 146.750244140625, -20.206756591796875, -9.615997314453125, 148.1338348388672, 89.13735961914062, 154.67965698242188, 79.5145034790039, 304.9912109375, 185.5172119140625, 176.15679931640625, -8.794044494628906, 138.97882080078125, 79.69121551513672, -46.46405029296875, -23.02044677734375, 133.86720275878906, 42.46856689453125, 148.27891540527344, 91.89476013183594, 244.73977661132812, -105.34219360351562, 107.1556396484375, 172.21563720703125, -70.25421142578125, 43.028076171875, -156.2659912109375, -97.58069610595703, 79.84918212890625, 11.495758056640625, -36.829368591308594, -28.478363037109375, 194.5609130859375, 4.349983215332031, 201.399169921875, 73.53005981445312, -14.3857421875, 9.128952026367188, -87.69082641601562, 139.69073486328125, -103.025390625, -26.94670867919922, 170.8616485595703, 238.8848114013672, -47.32537841796875, 6.798736572265625, 75.11512756347656, 99.54200744628906, 20.15325927734375, -2.208465576171875, 218.5283203125, -41.87565612792969, 64.17083740234375, 29.4459228515625, 60.384674072265625, 40.04063415527344, -231.231201171875, 235.58233642578125, 56.938079833984375, 221.45620727539062, 10.241058349609375, 99.18779754638672, 83.93206787109375, 386.261474609375, 208.24603271484375, 25.872398376464844, 49.88653564453125, -20.11346435546875, 101.1203384399414, 114.4189224243164, -7.4172821044921875, 63.810302734375, 36.421451568603516, 28.859588623046875, -2.6054821014404297, -109.01409912109375, 116.69020080566406, 148.17608642578125, 154.66009521484375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000409.npy"}
{"epoch": 0.856544502617801, "step": 410, "batch_size": 128, "mean": 58.010379791259766, "std": 99.23165130615234, "min": -179.27442932128906, "p10": -53.001052856445305, "median": 51.77790832519531, "p90": 183.42916412353514, "max": 318.6380920410156, "pos_frac": 0.6953125, "sample": [107.2056884765625, 20.56336212158203, 7.2030487060546875, -45.396484375, 17.53466796875, 135.2467041015625, -2.3142013549804688, 138.86705017089844, -10.5194091796875, 148.99037170410156, 150.93240356445312, 51.994903564453125, -11.121490478515625, 8.105987548828125, 206.35931396484375, 243.992919921875, 8.813936233520508, -38.287811279296875, -48.5977783203125, -15.80999755859375, -179.27442932128906, 183.12484741210938, 58.55927276611328, 36.32826232910156, 167.31033325195312, 19.4498291015625, 2.189208984375, -58.881988525390625, 104.36900329589844, 44.635398864746094, -22.239227294921875, 148.82489013671875, 29.868478775024414, 103.36129760742188, -16.855976104736328, 156.24899291992188, 22.955997467041016, 116.89749145507812, 64.31588745117188, 158.5120391845703, 57.41632080078125, -92.32052612304688, 166.87774658203125, 11.223716735839844, 137.39865112304688, -118.49603271484375, 2.442108154296875, 62.8017578125, -10.438728332519531, 19.422454833984375, 85.78076171875, 129.0692596435547, 89.06236267089844, 104.74105834960938, 60.53070068359375, -50.48065185546875, -4.047760009765625, 139.48153686523438, 74.35543060302734, 52.9417724609375, 184.03509521484375, -109.908203125, 163.24432373046875, 36.363311767578125, -92.4046630859375, 166.7587890625, 176.03775024414062, -7.83740234375, 97.9541015625, 85.066162109375, 14.369522094726562, 245.20977783203125, 179.27737426757812, -0.56427001953125, 9.9364013671875, 318.6380920410156, -47.31902313232422, 189.24395751953125, 22.113723754882812, 55.633544921875, -9.308029174804688, -27.43475341796875, -66.98426818847656, 0.0, 72.17318725585938, 84.74700927734375, 252.54498291015625, 32.612762451171875, -106.6575927734375, 69.63386535644531, -177.4440460205078, 12.086097717285156, -1.5622215270996094, 208.99447631835938, 168.07403564453125, -36.59332275390625, 84.23053741455078, -30.36944580078125, 137.05255126953125, -165.35214233398438, -82.82991027832031, 136.47512817382812, -68.58399963378906, 44.27850341796875, 111.64572143554688, 183.1694793701172, 51.5609130859375, -16.452964782714844, -168.4444580078125, 75.3192138671875, 204.05955505371094, 81.44888305664062, 188.1302490234375, 29.402618408203125, 150.81512451171875, 117.15805053710938, 251.07919311523438, 82.37469482421875, 134.73648071289062, -28.65447998046875, 16.287017822265625, 102.03233337402344, -3.04486083984375, 205.07772827148438, 46.5933837890625, -15.463775634765625, 285.52105712890625, -5.879474639892578], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000410.npy"}
{"epoch": 0.8586387434554974, "step": 411, "batch_size": 128, "mean": 71.87185668945312, "std": 95.75225067138672, "min": -184.76812744140625, "p10": -31.58607711791992, "median": 56.193939208984375, "p90": 193.26117858886718, "max": 454.52484130859375, "pos_frac": 0.8046875, "sample": [-32.074317932128906, 162.66854858398438, 168.13262939453125, 65.1693115234375, 34.82081604003906, 140.3455047607422, 163.59197998046875, 98.49700927734375, 265.8291015625, 4.5406494140625, 57.47833251953125, 1.793121337890625, -31.3768310546875, 73.58139038085938, 224.33908081054688, 71.77566528320312, 114.97076416015625, 7.909149169921875, 136.98825073242188, 193.28961181640625, 14.31524658203125, 195.47763061523438, 76.50381469726562, -77.24052429199219, 35.29541015625, -149.319091796875, 111.98818969726562, 2.2484264373779297, 196.61477661132812, -6.649528503417969, 30.133880615234375, -60.55560302734375, 108.57403564453125, 5.586273193359375, 2.718780517578125, 48.89007568359375, 221.30203247070312, 171.95118713378906, 36.58599853515625, 18.9251708984375, 56.1466064453125, 176.8692626953125, 99.2012939453125, 85.31988525390625, -13.36663818359375, 4.214752197265625, 74.76494598388672, 126.8770751953125, 56.24473571777344, 186.51031494140625, 11.654296875, 160.9082794189453, 174.429931640625, -2.5769500732421875, -88.74261474609375, -36.239959716796875, 5.453948974609375, -27.286468505859375, 5.554351806640625, 15.854743957519531, 49.18170166015625, 250.5684814453125, 129.64056396484375, 166.08822631835938, 62.700897216796875, 163.62713623046875, 208.602294921875, 81.73025512695312, 96.86918640136719, 67.35928344726562, -16.049514770507812, 118.76968383789062, 36.08831787109375, 150.1007080078125, 15.412750244140625, 454.52484130859375, -38.70538330078125, 100.4410400390625, 40.088623046875, -36.232666015625, 5.948883056640625, 216.5531005859375, 63.48552703857422, 193.24899291992188, 27.435333251953125, 15.03607177734375, 55.21122741699219, 56.24127197265625, 87.0074691772461, 174.04544067382812, 108.56047058105469, -72.32026672363281, 175.49908447265625, 334.61309814453125, 77.07205200195312, 184.32131958007812, 21.385211944580078, 0.5828857421875, -37.2991943359375, -47.961181640625, 45.38580322265625, -92.54331970214844, 186.18109130859375, 73.09000396728516, 54.69482421875, -8.594207763671875, 224.1795654296875, -2.210988998413086, -7.0068206787109375, 92.76434326171875, 12.43304443359375, -184.76812744140625, 53.5450439453125, -3.568511962890625, 262.97723388671875, 52.34107971191406, 103.68785095214844, 90.95306396484375, 24.71198272705078, 35.3446044921875, 27.610198974609375, 171.3494873046875, 127.34838104248047, 54.96778869628906, 123.51904296875, -29.165435791015625, -22.5335693359375, 16.0230712890625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000411.npy"}
{"epoch": 0.8607329842931937, "step": 412, "batch_size": 128, "mean": 57.04297637939453, "std": 99.0536117553711, "min": -143.0975799560547, "p10": -51.909130859375, "median": 52.83208465576172, "p90": 174.718098449707, "max": 369.20159912109375, "pos_frac": 0.671875, "sample": [-50.42486572265625, 262.70098876953125, 0.09871673583984375, 109.16064453125, 61.83868408203125, 130.42642211914062, 123.42489624023438, 99.11465454101562, -55.27642822265625, -97.5936279296875, -23.905303955078125, 57.086334228515625, -129.51611328125, -8.5721435546875, 145.02902221679688, 263.59716796875, -37.7230224609375, 285.59564208984375, 90.1571044921875, 354.84466552734375, -69.76617431640625, 83.01837158203125, 73.44732666015625, 125.74287414550781, 27.773345947265625, 60.013755798339844, 52.092315673828125, 1.54022216796875, 0.0, 173.73074340820312, -20.8699951171875, 96.4248046875, 43.4312744140625, 138.6119384765625, 91.43826293945312, 105.31521606445312, 143.09799194335938, 53.57185363769531, 71.95648193359375, 77.83755493164062, 36.82228088378906, 12.676559448242188, 53.6759033203125, 88.98764038085938, 119.30804443359375, -41.84759521484375, 177.0219268798828, -43.36865234375, 206.71295166015625, 131.5615234375, -79.40216064453125, 150.24124145507812, 13.8134765625, -1.7881755828857422, 369.20159912109375, 57.295135498046875, 110.5192642211914, -1.28411865234375, 256.48114013671875, 89.84827423095703, -4.2139892578125, 23.288345336914062, 33.225250244140625, 4.504310607910156, -9.270843505859375, -28.44622802734375, -103.11141967773438, 64.273193359375, 154.77789306640625, -22.06036376953125, 143.40719604492188, -97.90267944335938, 135.4569549560547, -4.2267608642578125, 158.92733764648438, 64.64663696289062, -12.514892578125, 57.38667297363281, 50.271446228027344, -5.83740234375, -143.0975799560547, -2.283111572265625, 49.95228576660156, 55.061981201171875, 73.12677001953125, 29.571510314941406, -3.0963287353515625, 146.135986328125, 315.975341796875, -56.44450378417969, 9.418441772460938, 188.35440063476562, -8.922172546386719, 92.85443115234375, -30.955184936523438, 207.4327392578125, 8.847305297851562, -42.77440643310547, 48.895263671875, -136.4276123046875, -14.091987609863281, 22.91986083984375, -2.1951446533203125, 2.7801971435546875, -34.36631774902344, 107.70941162109375, 141.14590454101562, 133.5054931640625, -91.78451538085938, 50.95964050292969, 237.1944580078125, 56.2171630859375, 161.99745178222656, 81.11714172363281, 9.918670654296875, -90.9998779296875, -98.35113525390625, -38.348907470703125, 148.28952026367188, 43.851043701171875, 192.24383544921875, -50.46600341796875, -46.478675842285156, 92.89163208007812, 85.39286804199219, -24.258163452148438, 105.37887573242188, 100.1741943359375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000412.npy"}
{"epoch": 0.86282722513089, "step": 413, "batch_size": 128, "mean": 54.442935943603516, "std": 94.56134033203125, "min": -158.38999938964844, "p10": -66.48431243896484, "median": 35.84979248046875, "p90": 181.4875991821289, "max": 325.2397766113281, "pos_frac": 0.6953125, "sample": [22.309911727905273, -33.83374786376953, 18.760597229003906, -8.501487731933594, -68.79029846191406, 61.49928283691406, 35.71612548828125, 187.56166076660156, 83.30120849609375, 143.79913330078125, 108.67034912109375, -89.01202392578125, 72.93191528320312, 83.72421264648438, 4.9105224609375, 0.0882568359375, 81.44924926757812, 209.12249755859375, 6.447025299072266, 152.83395385742188, -3.2171554565429688, -100.74551391601562, 81.58004760742188, 79.50735473632812, 73.7078857421875, 32.5537109375, -63.4573974609375, 188.61801147460938, 4.858970642089844, 13.641525268554688, -8.255767822265625, -105.31143188476562, -5.8226318359375, 87.43873596191406, 35.98345947265625, -14.621917724609375, 245.6797332763672, 165.177978515625, 200.4561767578125, 19.84731674194336, 15.66845703125, 127.95785522460938, 129.03579711914062, 12.68756103515625, -36.47322082519531, 23.941009521484375, -0.82989501953125, -81.091064453125, 100.15380859375, -105.67855834960938, 126.73114013671875, 85.85713195800781, 130.52719116210938, -32.93172073364258, 34.72943115234375, 116.09371948242188, 164.630126953125, -37.752288818359375, 51.6617431640625, 102.5107421875, 133.3009033203125, 152.91793823242188, -111.56845092773438, 325.2397766113281, -123.35299682617188, -19.685791015625, 212.19024658203125, 166.50643920898438, 159.09637451171875, -20.188995361328125, -86.33221435546875, 119.69668579101562, -0.794403076171875, 28.690826416015625, 30.844284057617188, 166.1414794921875, 209.94989013671875, -158.38999938964844, 92.81546020507812, 35.41687774658203, -8.406654357910156, 325.0041809082031, 49.02904510498047, -45.14512634277344, 155.87548828125, 101.73236083984375, 8.72222900390625, 21.68194580078125, -2.9183521270751953, -35.898406982421875, 208.92459106445312, -7.317529678344727, 131.62074279785156, 227.38482666015625, 42.901145935058594, -95.78948974609375, 163.97457885742188, -40.3609619140625, -26.657669067382812, 88.08169555664062, 98.68963623046875, -47.237518310546875, 59.73785400390625, 82.13482666015625, 98.15365600585938, -43.438621520996094, 73.67730712890625, 26.913360595703125, 216.95211791992188, 153.34910583496094, -6.3548583984375, 113.32830047607422, 8.069732666015625, 82.29994201660156, -6.677803039550781, 17.644378662109375, 84.946533203125, 31.822494506835938, 13.705596923828125, 76.40521240234375, -65.49603271484375, 67.22357177734375, 220.26220703125, -69.51315307617188, 178.88442993164062, 74.19894409179688, -92.227294921875, 18.27197265625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000413.npy"}
{"epoch": 0.8649214659685864, "step": 414, "batch_size": 128, "mean": 68.52609252929688, "std": 99.59636688232422, "min": -193.84515380859375, "p10": -38.70213928222656, "median": 55.83991622924805, "p90": 188.88783264160156, "max": 433.86773681640625, "pos_frac": 0.796875, "sample": [54.991661071777344, -73.48906707763672, 25.468772888183594, 100.65701293945312, 48.960113525390625, 118.03115844726562, -159.52090454101562, 85.62384033203125, -20.561279296875, -102.03065490722656, 107.66839599609375, -9.824848175048828, 69.10845947265625, 213.6351318359375, -25.100799560546875, 0.93487548828125, 18.42041015625, 55.72935485839844, 192.19178771972656, 78.03236389160156, -30.791549682617188, 63.57536315917969, -18.575393676757812, 93.04942321777344, -44.9132080078125, 49.27960205078125, -193.84515380859375, 433.86773681640625, 256.0700378417969, 56.752716064453125, 5.985752105712891, 16.272323608398438, 6.633415222167969, 131.57403564453125, 90.53472900390625, 48.9302978515625, 165.1332550048828, -12.2242431640625, 141.94422912597656, 80.90361022949219, -108.7720947265625, -77.48175048828125, 185.47418212890625, 9.8272705078125, 10.730920791625977, 300.58758544921875, 100.47068786621094, 42.65837097167969, 71.5953369140625, 18.466110229492188, 160.73269653320312, -18.597068786621094, 18.898727416992188, 98.69329833984375, 240.13192749023438, 55.950477600097656, 282.12261962890625, 139.3980255126953, 126.953125, 2.9787673950195312, -40.14044189453125, 260.39013671875, 31.3177490234375, 89.45388793945312, -38.085723876953125, -24.59100341796875, -7.56268310546875, 24.867218017578125, 5.174652099609375, 189.45596313476562, 47.298126220703125, 40.871551513671875, 141.74951171875, -58.58551025390625, -5.79058837890625, 28.7039794921875, 230.74920654296875, 141.80191040039062, 174.8876953125, 51.253509521484375, 81.74044799804688, 188.64434814453125, 61.53019714355469, 168.03353881835938, 4.232114791870117, 38.01130676269531, 71.23622131347656, 32.80516815185547, -30.525741577148438, 76.5946044921875, -129.91018676757812, 43.40606689453125, -48.34269714355469, -0.7396240234375, -132.04681396484375, 14.12847900390625, 271.92376708984375, 42.5487060546875, 15.881500244140625, -52.13386154174805, 11.288772583007812, 254.93402099609375, 103.81268310546875, 5.174571990966797, 157.85861206054688, 169.01014709472656, 1.6859703063964844, 79.15695190429688, 44.09991455078125, 127.63031005859375, 6.540771484375, 158.06837463378906, 163.50830078125, 7.3251495361328125, 149.15948486328125, 90.90609741210938, 185.3981170654297, 179.13357543945312, 58.09130859375, 101.29700469970703, 77.39205932617188, 143.281494140625, 11.733341217041016, 121.9749755859375, 100.47897338867188, 244.9456787109375, 104.48382568359375, 132.83016967773438], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000414.npy"}
{"epoch": 0.8670157068062827, "step": 415, "batch_size": 128, "mean": 62.18010330200195, "std": 86.8384780883789, "min": -212.2364501953125, "p10": -46.26404151916503, "median": 55.446693420410156, "p90": 185.25797119140626, "max": 313.9624938964844, "pos_frac": 0.7578125, "sample": [18.761384963989258, 120.42103576660156, 163.32403564453125, 17.114818572998047, 197.05413818359375, 18.9649658203125, -128.5467529296875, 28.8079833984375, 313.9624938964844, 6.014007568359375, 104.1973876953125, 59.849693298339844, 185.1865234375, -14.028091430664062, -45.1932373046875, 71.36019897460938, 194.4002685546875, -14.38934326171875, 80.45742797851562, 130.953125, 102.07098388671875, 73.100830078125, 175.524658203125, -212.2364501953125, 111.922119140625, -8.699676513671875, 59.1103515625, -111.87139892578125, 157.22776794433594, 24.94512939453125, -75.37191772460938, 126.21649169921875, -63.90630340576172, -7.3610687255859375, 95.15520477294922, 16.4195556640625, 93.5732421875, 252.56732177734375, 67.08416748046875, 31.077362060546875, 140.42454528808594, 35.89599609375, 185.4246826171875, -7.4538421630859375, -82.427490234375, -28.360992431640625, 89.17897033691406, 94.5416259765625, -3.948394775390625, -0.41274452209472656, -58.38262939453125, 108.06787109375, 169.98123168945312, 53.33403778076172, 111.02001953125, -38.550384521484375, 104.02880859375, -59.148345947265625, -98.406494140625, 47.45002365112305, 43.34236145019531, 123.09967041015625, 189.43634033203125, 33.07189178466797, -0.689117431640625, 14.872367858886719, -0.289886474609375, -48.7625846862793, -7.301124572753906, 103.2635726928711, 4.78863525390625, 48.9786376953125, 60.33045959472656, 53.811187744140625, 64.26568603515625, 149.389404296875, 12.0389404296875, 158.99078369140625, 82.62176513671875, -23.710235595703125, 185.74758911132812, 119.77435302734375, 9.1146240234375, 6.601593017578125, 48.043426513671875, 153.70034790039062, -37.62019348144531, 21.0286865234375, 108.96719360351562, 49.9066162109375, 59.37742614746094, 27.31689453125, -55.53094482421875, 93.2420654296875, 179.91897583007812, 46.54833984375, 112.71897888183594, 106.97906494140625, 43.31627655029297, -12.090217590332031, 18.87529754638672, -61.509727478027344, 155.1971435546875, 69.41839599609375, 235.51966857910156, 98.99887084960938, -22.44061279296875, 194.63702392578125, 55.29888916015625, 54.31886291503906, 19.643890380859375, 37.619110107421875, 26.673095703125, 138.6484375, 73.55111694335938, -71.75836181640625, 198.35902404785156, 103.81642150878906, 215.2625732421875, 55.59449768066406, 113.36233520507812, 189.6240234375, 135.3018798828125, -29.138427734375, 146.86129760742188, 85.88253784179688, 3.0622406005859375, 212.2864990234375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000415.npy"}
{"epoch": 0.8691099476439791, "step": 416, "batch_size": 128, "mean": 73.05046081542969, "std": 100.4564208984375, "min": -226.583251953125, "p10": -37.78658561706542, "median": 57.722625732421875, "p90": 212.9567428588867, "max": 276.5634765625, "pos_frac": 0.75, "sample": [235.33726501464844, 234.68536376953125, 240.71163940429688, 52.74859619140625, 214.2734832763672, 6.73114013671875, 37.95292282104492, 178.71755981445312, 99.28457641601562, 215.28768920898438, 276.5634765625, -11.076416015625, 150.14523315429688, 172.75408935546875, 20.1741943359375, 175.8513641357422, 185.85272216796875, 47.270660400390625, 110.01026916503906, 148.76422119140625, 264.3277893066406, 8.221420288085938, 104.05360412597656, 204.18218994140625, -14.1026611328125, 25.10992431640625, 141.8702392578125, -54.48273849487305, 64.78416442871094, -23.418701171875, 5.26202392578125, 92.37583923339844, 21.269493103027344, 17.615615844726562, -46.82269287109375, 117.48833465576172, -220.65493774414062, 190.9305419921875, 164.29745483398438, 29.25274658203125, 123.16454315185547, 72.17109680175781, 26.553634643554688, 150.7783660888672, 33.55816650390625, 48.42302703857422, -60.43013000488281, 206.34378051757812, -31.464691162109375, 168.96412658691406, 2.03204345703125, -10.680633544921875, -106.3568115234375, 62.24034118652344, 200.12945556640625, 17.276580810546875, -1.149261474609375, 71.3634033203125, -0.9005126953125, 89.56304931640625, 77.47918701171875, 132.47698974609375, 92.03627014160156, -23.07439422607422, -51.00578308105469, 132.8623504638672, -31.53192138671875, 186.5738525390625, 102.76742553710938, 17.385536193847656, -76.29481506347656, -226.583251953125, -64.91705322265625, 225.44752502441406, -31.87677001953125, 76.031494140625, 130.3605499267578, 95.57727813720703, -32.227020263671875, 52.709075927734375, 49.59423828125, 234.9119415283203, 270.7416076660156, 27.773212432861328, 106.71295166015625, -45.710205078125, 165.50244140625, -19.83635711669922, -45.2030143737793, 21.397430419921875, 110.85444641113281, 162.56988525390625, 59.261749267578125, -11.707275390625, 161.24139404296875, 3.8477859497070312, 28.526519775390625, 16.422073364257812, -27.65643310546875, 31.86432647705078, -112.52774047851562, 221.08334350585938, -7.7730865478515625, 212.39242553710938, -16.12458038330078, 19.350982666015625, 164.12669372558594, 194.4731903076172, 56.183502197265625, 110.11441040039062, -4.175262451171875, 30.37408447265625, 24.86865234375, 125.52764892578125, 75.35369873046875, -34.608116149902344, 243.93829345703125, 85.65048217773438, -15.757484436035156, 195.9727783203125, 47.73931884765625, 162.76849365234375, -89.15707397460938, 161.63844299316406, 215.35455322265625, 47.78401184082031, 29.85224151611328, 175.55039978027344], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000416.npy"}
{"epoch": 0.8712041884816754, "step": 417, "batch_size": 128, "mean": 66.60511016845703, "std": 102.07830810546875, "min": -196.63626098632812, "p10": -65.13788757324218, "median": 58.93611145019531, "p90": 202.31461181640623, "max": 344.67718505859375, "pos_frac": 0.7421875, "sample": [131.03378295898438, 206.5487060546875, 74.91604614257812, 87.69212341308594, 88.8087158203125, 115.5200424194336, 33.992584228515625, 280.0992736816406, 21.102432250976562, -85.42668151855469, -58.31207275390625, 72.3292236328125, 15.321609497070312, 104.40896606445312, 124.87574005126953, -7.636579513549805, 121.20468139648438, -67.65972900390625, 196.7869873046875, 72.13449096679688, -73.09808349609375, 117.9168701171875, 309.5369873046875, 45.738006591796875, 139.47581481933594, 0.0, 73.87225341796875, 82.45179748535156, 9.384498596191406, 200.5, 149.02853393554688, 160.84112548828125, -33.05747985839844, 274.1318054199219, 49.78648376464844, -22.7923583984375, 47.397216796875, -65.36605834960938, 144.55215454101562, 164.40731811523438, 243.35162353515625, 34.9976806640625, 247.7769775390625, -109.06658935546875, -2.50390625, -60.6033935546875, -76.85818481445312, 166.2665557861328, -196.63626098632812, 10.616737365722656, -72.86221313476562, 89.40029907226562, 107.19439697265625, -69.73724365234375, -131.89385986328125, 5.674461364746094, 132.62904357910156, 46.420074462890625, 102.27711486816406, 12.740615844726562, 2.965606689453125, 78.27992248535156, 221.29229736328125, -4.881290435791016, 243.91250610351562, 180.86175537109375, 54.933929443359375, 145.4810791015625, 43.73388671875, 44.492584228515625, -33.90034484863281, 4.05126953125, -7.085573196411133, -95.25543212890625, 61.1552734375, 21.785724639892578, -40.7967529296875, 134.83016967773438, 29.276901245117188, -33.498046875, 200.24786376953125, 46.41912841796875, 87.42413330078125, 6.9547119140625, 139.02186584472656, 60.271759033203125, 241.56494140625, -3.172454833984375, 25.591033935546875, 30.075897216796875, -51.899566650390625, 181.43170166015625, 221.321044921875, 76.4415283203125, 129.86917114257812, -65.04010009765625, 112.40814971923828, 106.31417846679688, -98.40336608886719, 10.856700897216797, 87.80508422851562, 104.88687133789062, 25.049713134765625, 77.127685546875, -62.25408935546875, 283.8069152832031, 197.34912109375, -9.42010498046875, 57.6004638671875, 160.20068359375, -3.7707881927490234, -46.27576446533203, 12.761581420898438, 150.5428466796875, 0.0, 3.23040771484375, -121.88900756835938, 40.81915283203125, 74.02618408203125, 6.445526123046875, 88.68014526367188, 175.24215698242188, 344.67718505859375, 82.43215942382812, 184.34837341308594, 43.69598388671875, 209.459228515625, 67.9114990234375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000417.npy"}
{"epoch": 0.8732984293193717, "step": 418, "batch_size": 128, "mean": 62.140403747558594, "std": 99.09811401367188, "min": -172.47979736328125, "p10": -62.637203979492185, "median": 54.97100067138672, "p90": 189.82700195312498, "max": 291.77850341796875, "pos_frac": 0.71875, "sample": [-157.03981018066406, 172.32888793945312, 65.6700439453125, 142.01873779296875, 250.9243927001953, 156.9080810546875, 37.054412841796875, 226.58798217773438, 178.82223510742188, -91.66725158691406, -12.751556396484375, 8.45263671875, 117.52752685546875, -13.202728271484375, -4.96954345703125, 291.77850341796875, 192.94384765625, 9.743240356445312, 154.8146514892578, -47.931541442871094, 205.46463012695312, 69.6653060913086, 3.7158737182617188, 182.19833374023438, 115.30386352539062, 188.4912109375, -62.33331298828125, 63.363922119140625, 53.952850341796875, 12.557493209838867, 55.98915100097656, 75.09590148925781, 22.74041748046875, -48.23588562011719, 226.1885986328125, 167.7608642578125, -12.362319946289062, 156.06765747070312, -72.4857177734375, 34.802833557128906, 237.323974609375, 106.0706787109375, -5.542236328125, 9.313867568969727, 103.68434143066406, 165.32579040527344, 90.26213073730469, -1.54205322265625, 9.683334350585938, 49.63453674316406, -23.269012451171875, 40.19390869140625, -74.50750732421875, 7.1436004638671875, 118.56254577636719, 122.365478515625, 158.61636352539062, 229.30357360839844, 143.47970581054688, -26.651165008544922, 256.47882080078125, 108.875, 7.789451599121094, 264.3827209472656, -145.018798828125, -165.43966674804688, 100.81999206542969, 98.74447631835938, -23.51055145263672, 99.30499267578125, -8.40435791015625, 134.47918701171875, -19.67529296875, -37.308624267578125, -19.415863037109375, 188.46461486816406, 28.88916015625, 91.53424072265625, 141.94793701171875, 38.95745849609375, 31.85779571533203, 23.31696319580078, 50.87939453125, 34.68421173095703, -138.45831298828125, 50.574981689453125, 59.428558349609375, 171.94607543945312, 48.94781494140625, -39.48974609375, -23.2535400390625, -85.8463134765625, 84.01382446289062, -29.044021606445312, -31.3812255859375, 67.43453979492188, 61.649078369140625, -9.7520751953125, 60.7674560546875, 47.79316711425781, -2.8570709228515625, 26.46826171875, -91.65118408203125, 22.481101989746094, 40.9378662109375, 251.49658203125, 136.064453125, 58.7010498046875, -88.85931396484375, 126.09657287597656, 143.92816162109375, 95.42349243164062, 194.5179443359375, -59.267364501953125, 170.75970458984375, -63.346282958984375, -172.47979736328125, 102.26556396484375, 70.70211029052734, 119.3365478515625, 18.218902587890625, 147.974853515625, 146.25875854492188, 73.09004211425781, 39.97099304199219, 154.43154907226562, 207.5289306640625, -67.58990478515625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000418.npy"}
{"epoch": 0.875392670157068, "step": 419, "batch_size": 128, "mean": 72.84529113769531, "std": 107.21995544433594, "min": -360.54168701171875, "p10": -61.475730895996094, "median": 77.2320556640625, "p90": 202.79902954101559, "max": 392.26416015625, "pos_frac": 0.7421875, "sample": [116.78634643554688, -24.69244384765625, -13.398147583007812, 215.40142822265625, 52.936649322509766, -23.767440795898438, 29.7706298828125, 26.852569580078125, 197.91339111328125, 34.50550079345703, 94.70492553710938, 161.3870849609375, 61.365966796875, 129.72994995117188, 130.057861328125, 244.5361785888672, 179.32244873046875, -89.32847595214844, 224.5625, 259.7969665527344, 167.52859497070312, 79.62176513671875, 146.32542419433594, 268.15460205078125, 26.108184814453125, 137.97946166992188, 172.259521484375, 153.30226135253906, 150.05831909179688, 214.1988525390625, -360.54168701171875, 47.9246826171875, 85.22023010253906, 76.70294189453125, 19.707313537597656, 124.23773193359375, 248.5633544921875, -1.3314743041992188, 119.8056640625, -149.90438842773438, 137.6556396484375, 117.29254150390625, 123.65594482421875, 72.70228576660156, -20.84101104736328, 83.84672546386719, 137.50289916992188, 114.51211547851562, -16.9981689453125, 92.7271957397461, -73.04029846191406, 90.4854736328125, 153.38690185546875, -3.95831298828125, 296.0299377441406, -33.57649230957031, -6.2487945556640625, 60.29624938964844, -23.853073120117188, 185.8131103515625, -148.00326538085938, 91.69091796875, 25.966262817382812, -2.01177978515625, 36.19609069824219, -28.419647216796875, -60.592838287353516, 77.76116943359375, 135.5635986328125, -60.69490051269531, 72.37655639648438, 80.32171630859375, 190.3323974609375, 105.668212890625, 96.16189575195312, 20.97021484375, 392.26416015625, 154.26251220703125, 132.25860595703125, 81.74005126953125, 179.01028442382812, 43.799774169921875, 73.76655578613281, 0.0, 131.24383544921875, -65.3612060546875, 69.58306884765625, 170.3657989501953, 112.129150390625, -0.0949249267578125, 196.09507751464844, 46.308349609375, 84.5439453125, 166.35494995117188, 4.89837646484375, 27.3482666015625, -120.90634155273438, 169.8621826171875, 24.652183532714844, 93.70672607421875, 215.37283325195312, 11.934478759765625, 237.71780395507812, -113.91241455078125, -43.282623291015625, -84.25289916992188, 227.9042205810547, 144.16171264648438, -68.94410705566406, 36.212371826171875, 47.161865234375, -41.47306442260742, -25.92340087890625, -88.8126220703125, 4.61846923828125, 0.0, 81.42645263671875, 191.7233123779297, -63.29766845703125, -115.96670532226562, 174.13470458984375, 49.760101318359375, 7.9393157958984375, 225.95037841796875, 129.86474609375, 48.602317810058594, 72.507080078125, 40.1956787109375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000419.npy"}
{"epoch": 0.8774869109947644, "step": 420, "batch_size": 128, "mean": 77.0727310180664, "std": 114.14236450195312, "min": -239.4014892578125, "p10": -48.68480529785155, "median": 66.73988723754883, "p90": 222.97119750976563, "max": 387.6334533691406, "pos_frac": 0.7578125, "sample": [-23.658390045166016, 16.43133544921875, 81.10263061523438, -196.8782958984375, 165.2047576904297, 168.93234252929688, 27.61065673828125, 28.817764282226562, 160.9914093017578, -26.753982543945312, 139.33090209960938, -57.060943603515625, 109.31581115722656, 83.40518188476562, -39.11244201660156, 135.0107421875, 34.57804870605469, 22.71240234375, 24.716705322265625, 181.27935791015625, 22.9505615234375, 100.47607421875, 222.59414672851562, -15.532958984375, -7.15008544921875, 6.575725555419922, 243.069580078125, -71.22698974609375, 111.50096130371094, 202.94308471679688, 144.11070251464844, 32.874481201171875, -45.09503173828125, 65.70303344726562, 33.271270751953125, 182.2998046875, 127.5023193359375, 318.000244140625, 133.69369506835938, 44.203369140625, 173.89837646484375, 98.42098236083984, -26.72998809814453, 53.68585205078125, 118.00267028808594, 243.3385467529297, 128.22372436523438, -0.8219451904296875, 302.35772705078125, 169.80162048339844, 97.86077880859375, 10.455490112304688, 246.96905517578125, 129.27854919433594, -210.69595336914062, 42.909141540527344, 278.328857421875, 147.32577514648438, -26.64215087890625, 15.84494400024414, 114.56600952148438, -120.3511962890625, 256.60614013671875, 85.26594543457031, -3.603271484375, 139.7640380859375, 128.56051635742188, 4.9337158203125, 104.282958984375, 28.265640258789062, -33.88328552246094, 280.23480224609375, -76.156982421875, 387.6334533691406, 83.83468627929688, 57.112953186035156, 0.0, 28.22998046875, -181.49472045898438, -38.750999450683594, 6.8367919921875, 191.9392547607422, -39.814208984375, 253.49830627441406, 124.49590301513672, 123.85653686523438, 181.37921142578125, 7.9486083984375, 223.85098266601562, 176.05538940429688, -8.5291748046875, 15.505706787109375, 156.98080444335938, 200.80816650390625, -78.9505615234375, 67.77674102783203, -89.17984008789062, 29.879852294921875, 31.627532958984375, 24.732498168945312, 115.80987548828125, 143.66055297851562, 127.86349487304688, -2.8819732666015625, 80.38552856445312, 44.8514404296875, -83.23684692382812, 208.46701049804688, 184.87864685058594, 176.93246459960938, 1.53350830078125, -9.145706176757812, -22.58594512939453, 61.544708251953125, 266.9041748046875, 36.71443176269531, -239.4014892578125, -84.49747467041016, 135.239501953125, 140.9050750732422, 368.4298095703125, 154.053466796875, 117.20326232910156, 44.5137939453125, 6.0788726806640625, 192.6544189453125, -64.32333374023438, 36.457061767578125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000420.npy"}
{"epoch": 0.8795811518324608, "step": 421, "batch_size": 128, "mean": 63.78009033203125, "std": 93.51058197021484, "min": -234.73480224609375, "p10": -49.62861022949218, "median": 64.30415344238281, "p90": 188.02911987304688, "max": 291.4152526855469, "pos_frac": 0.7734375, "sample": [209.6644287109375, 17.777420043945312, 137.4861602783203, 2.596435546875, 92.99214172363281, 71.72013092041016, 34.70661926269531, 36.32902526855469, 70.09893798828125, -34.22309875488281, 83.05050659179688, 75.12060546875, 187.73199462890625, 138.09576416015625, 165.4383544921875, -143.00656127929688, 180.58526611328125, -37.59680938720703, 143.3654022216797, 118.62347412109375, 80.23370361328125, -46.08456039428711, -100.76226806640625, 63.09443664550781, 163.8934783935547, -61.541847229003906, 84.94683837890625, 241.6766357421875, 105.77456665039062, 114.20651245117188, -76.71537780761719, 175.98910522460938, 16.70880126953125, 34.19761657714844, 55.63763427734375, 18.671356201171875, 157.0634765625, 128.13250732421875, 128.7552032470703, 139.1307373046875, 19.133331298828125, -94.90754699707031, 53.601806640625, 129.853515625, 89.16973876953125, 69.34588623046875, 39.059722900390625, 252.88287353515625, 188.722412109375, 241.92674255371094, 190.84597778320312, 291.4152526855469, -56.6053466796875, -46.98921203613281, 49.140716552734375, 72.97679138183594, -6.9951171875, 24.797637939453125, 138.49789428710938, -51.946197509765625, 90.57869720458984, 0.0, -30.670440673828125, -48.768280029296875, -82.9302978515625, -124.34740447998047, 163.08206176757812, -37.648040771484375, 157.43576049804688, 183.81900024414062, 53.2774658203125, 30.598758697509766, 101.51029205322266, 271.3030700683594, 168.147705078125, 65.88629150390625, -10.744346618652344, 175.77236938476562, 14.775848388671875, 81.59268188476562, 46.111419677734375, 103.12774658203125, 30.34391212463379, 107.98839569091797, 62.21929931640625, 68.072509765625, 64.98379516601562, 7.61181640625, 99.83966064453125, -43.45738983154297, 29.08696746826172, 3.595001220703125, 208.03378295898438, 191.78436279296875, 58.76031494140625, 2.2195072174072266, -65.97264099121094, 98.66122436523438, 69.93267822265625, 54.091285705566406, -12.396087646484375, 29.833473205566406, 63.62451171875, -51.63604736328125, 76.50971984863281, 88.27892303466797, 0.0, -234.73480224609375, 36.63714599609375, 19.427139282226562, 253.955078125, 73.93307495117188, 46.76649475097656, 30.6978759765625, 85.09928894042969, 75.22486114501953, 144.95156860351562, -173.49566650390625, -20.685638427734375, 63.157073974609375, 188.9637451171875, 88.01467895507812, 5.821317672729492, 60.88525390625, -19.722457885742188, 80.20635986328125, 203.0242919921875, -25.685516357421875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000421.npy"}
{"epoch": 0.881675392670157, "step": 422, "batch_size": 128, "mean": 69.3071060180664, "std": 105.54927825927734, "min": -239.36859130859375, "p10": -49.7024673461914, "median": 59.58625793457031, "p90": 201.43055419921873, "max": 350.2178955078125, "pos_frac": 0.703125, "sample": [29.3009033203125, -239.36859130859375, 103.6478271484375, -103.57017517089844, -7.3543853759765625, 328.5491943359375, -8.671966552734375, 156.60842895507812, -21.15802001953125, -0.43231201171875, -2.74200439453125, 151.19464111328125, 123.43328857421875, 183.75558471679688, 112.204345703125, 98.94122314453125, 185.76278686523438, 27.61224365234375, 12.495559692382812, 78.93589782714844, 28.91473388671875, 45.55570983886719, 253.1822509765625, -26.389625549316406, 223.6732177734375, -26.454368591308594, 144.51158142089844, 46.91210174560547, 178.79824829101562, 180.69024658203125, -46.951385498046875, 86.94021606445312, 111.33514404296875, -44.63975524902344, 53.990234375, -108.03170776367188, 1.3845787048339844, -90.70613098144531, 159.83514404296875, -10.600677490234375, 120.67362976074219, 46.788177490234375, 75.33503723144531, 47.68882751464844, 133.73898315429688, 35.74734878540039, 98.87982177734375, 58.7052001953125, -19.470779418945312, 221.126953125, 124.26205444335938, -71.80331420898438, 180.4158477783203, 254.7076873779297, 245.7586669921875, 125.19146728515625, 160.2633056640625, 135.21578979492188, 101.7628173828125, -18.301544189453125, -89.21528625488281, 207.67620849609375, 44.60284423828125, 3.8812942504882812, 69.79367065429688, -15.816864013671875, 150.4095458984375, 89.25032043457031, -10.849777221679688, 170.8065185546875, -5.961212158203125, 81.0775146484375, -59.600616455078125, 328.5780334472656, -122.30165100097656, 170.74887084960938, -56.12165832519531, 95.59082794189453, -36.92718505859375, 49.2828369140625, 249.88699340820312, -26.709381103515625, 67.47760009765625, 17.892906188964844, 16.7183837890625, 136.80364990234375, 131.86947631835938, -8.6004638671875, 147.20358276367188, 67.17166137695312, 12.981761932373047, -22.4949951171875, 17.2955322265625, 117.02677154541016, -5.033012390136719, 350.2178955078125, 172.13262939453125, 95.783935546875, 60.467315673828125, -132.23068237304688, 12.42962646484375, -166.97088623046875, 34.28125, 198.65371704101562, 133.09329223632812, 198.75384521484375, 35.23153305053711, -7.462921142578125, 230.77520751953125, 56.156925201416016, -15.261581420898438, -132.32443237304688, 136.79478454589844, 14.864151000976562, 208.9521026611328, 48.131683349609375, -13.17193603515625, 196.07623291015625, 0.0, -66.81072998046875, 122.96987915039062, -14.4688720703125, 123.78887939453125, 112.95236206054688, 86.52484130859375, 98.57627868652344, 28.76688003540039, 251.46173095703125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000422.npy"}
{"epoch": 0.8837696335078534, "step": 423, "batch_size": 128, "mean": 74.56527709960938, "std": 98.19903564453125, "min": -190.3477783203125, "p10": -45.72375488281249, "median": 66.22330665588379, "p90": 199.10138549804688, "max": 399.01470947265625, "pos_frac": 0.7890625, "sample": [7.0383148193359375, 399.01470947265625, -55.8079833984375, 30.453155517578125, 69.42047119140625, 39.922332763671875, 44.353668212890625, 162.8291015625, 195.291259765625, 195.608642578125, 133.62542724609375, 55.5791015625, 16.753662109375, -1.017578125, 239.10488891601562, -101.26812744140625, -72.16535186767578, 149.08847045898438, -34.73186492919922, 72.71017456054688, 1.3970718383789062, -42.366424560546875, 105.99441528320312, 75.0345458984375, 35.570068359375, 179.23092651367188, 49.37901306152344, 214.93035888671875, 7.248748779296875, 60.203857421875, 16.172977447509766, 103.8551025390625, 165.26837158203125, 78.65509033203125, 136.2960662841797, -43.736328125, 293.6494140625, 23.848724365234375, 19.106536865234375, 109.09024047851562, 18.448726654052734, 17.082275390625, 42.61888122558594, 163.61785888671875, -190.3477783203125, 110.0933837890625, 139.36041259765625, -21.195770263671875, -1.971261978149414, 169.56295776367188, 91.43521118164062, 107.79866027832031, 44.82025146484375, 96.51397705078125, 160.440673828125, 167.71583557128906, 200.91064453125, 25.001434326171875, 154.37173461914062, 43.022361755371094, 153.812255859375, 58.60008239746094, 254.31759643554688, 44.019927978515625, -13.46221923828125, 160.3561553955078, 28.3524169921875, 37.57781982421875, 141.15707397460938, 215.19805908203125, -16.070281982421875, 124.45407104492188, -87.37738037109375, 130.537841796875, 178.6097412109375, 77.76644897460938, -23.22027587890625, -80.67231750488281, 142.92449951171875, 178.518798828125, 102.05609130859375, 89.74331665039062, 49.53581237792969, 80.04965209960938, 109.76153564453125, -148.9423828125, 25.135772705078125, 192.17828369140625, 10.618255615234375, 237.06695556640625, 228.01235961914062, 114.25897216796875, 177.803466796875, 182.14785766601562, 208.7996826171875, -17.7109375, 52.27264404296875, 82.77284240722656, -50.361083984375, -14.848968505859375, 19.20806884765625, 4.591667175292969, 30.630386352539062, 204.7938232421875, 135.35940551757812, 198.32598876953125, -7.531280517578125, 7.7689208984375, 63.02614212036133, 15.158248901367188, 93.20964050292969, 103.53767395019531, -103.59698486328125, 99.49276733398438, 222.69036865234375, -52.005706787109375, 156.42578125, 14.972206115722656, -57.16835021972656, 6.467836380004883, 85.61614990234375, -54.372222900390625, 222.49539184570312, 188.09310913085938, 21.2919921875, -42.322296142578125, -12.637819290161133, -108.8475341796875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000423.npy"}
{"epoch": 0.8858638743455497, "step": 424, "batch_size": 128, "mean": 70.28209686279297, "std": 93.63748168945312, "min": -175.5469970703125, "p10": -33.59558410644531, "median": 67.98162841796875, "p90": 172.4288787841797, "max": 436.5989685058594, "pos_frac": 0.7890625, "sample": [54.82049560546875, 247.18638610839844, 147.02464294433594, -58.63726806640625, 150.79412841796875, 13.233148574829102, 135.21295166015625, 95.74546813964844, 11.05438232421875, 101.594482421875, 167.39877319335938, 185.1158905029297, -46.558265686035156, 39.6683349609375, -54.94953536987305, 67.30377197265625, 272.7015380859375, 166.02645874023438, 155.9893798828125, 85.693359375, 107.23455810546875, 108.39248657226562, 159.02218627929688, 72.83837890625, 108.91427612304688, 112.14179992675781, -12.497177124023438, -3.7600021362304688, 129.71749877929688, 111.9373779296875, -148.10992431640625, 5.627519607543945, 49.657222747802734, 6.738555908203125, 72.70904541015625, -16.74591064453125, 127.60321044921875, 140.92288208007812, 187.514892578125, -25.1146240234375, -9.452926635742188, 49.1514892578125, 126.525146484375, 171.63790893554688, 25.22076416015625, 73.9337158203125, 11.26104736328125, 260.6632995605469, -174.126953125, 12.79620361328125, -32.2982177734375, 178.39559936523438, 0.9730262756347656, -10.730804443359375, 40.960174560546875, 70.96578979492188, 85.30622863769531, -13.086713790893555, -36.622772216796875, 0.0, 15.431501388549805, 42.819183349609375, 49.680511474609375, 436.5989685058594, 28.94140625, 38.30023193359375, 7.2784423828125, -23.850677490234375, 68.65948486328125, 105.34602355957031, 135.95913696289062, -175.5469970703125, 95.07656860351562, 116.24995422363281, 39.27562713623047, -8.793701171875, -9.090621948242188, 145.9676513671875, 111.26620483398438, 79.45794677734375, 120.94999694824219, 174.27447509765625, 166.09927368164062, 80.07354736328125, 105.3697509765625, 19.766468048095703, -45.883846282958984, 3.107696533203125, 9.748672485351562, 43.525177001953125, 10.21380615234375, 157.24887084960938, 51.632293701171875, 166.4967041015625, -1.419921875, 94.3175277709961, 300.261962890625, 149.4934844970703, -108.81500244140625, 150.82211303710938, 0.14197540283203125, 187.625732421875, 168.05767822265625, 139.36151123046875, 140.7977294921875, 126.13179016113281, 106.0499267578125, 207.7762451171875, -4.8750457763671875, -37.4498291015625, 139.36849975585938, 62.124114990234375, -50.44335174560547, 184.34744262695312, 129.8467559814453, 4.570098876953125, -82.98377990722656, 7.9500274658203125, 38.85877227783203, -93.03205871582031, 9.639884948730469, 18.057769775390625, 48.950321197509766, 166.32321166992188, 194.8193359375, 43.814117431640625, 10.85123062133789, 120.48367309570312], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000424.npy"}
{"epoch": 0.8879581151832461, "step": 425, "batch_size": 128, "mean": 58.470848083496094, "std": 105.1636962890625, "min": -201.99911499023438, "p10": -65.63017196655274, "median": 45.916603088378906, "p90": 188.13565673828126, "max": 411.1964111328125, "pos_frac": 0.7421875, "sample": [105.10894775390625, 180.56591796875, 100.28860473632812, 15.549468994140625, -104.05975341796875, 52.75685119628906, 154.61822509765625, -8.842611312866211, 62.60612487792969, 243.614013671875, 5.51188850402832, 39.9268798828125, -106.78360748291016, 182.06719970703125, 79.24185180664062, 134.62522888183594, 209.9677734375, -35.46800231933594, 53.27320861816406, 2.7914257049560547, 83.9442138671875, -20.86919403076172, 114.83319091796875, -39.636474609375, 8.399608612060547, 41.739654541015625, 27.617218017578125, 265.16156005859375, 200.349853515625, 84.8665771484375, 411.1964111328125, -4.572017669677734, -23.5201416015625, 160.59420776367188, 51.880462646484375, 52.57534408569336, 69.96771240234375, 137.41839599609375, 15.724334716796875, 96.65894317626953, 181.49468994140625, 75.23841094970703, -35.52537536621094, 6.224246978759766, -86.76910400390625, 112.80441284179688, 171.62307739257812, 49.39875793457031, 241.68743896484375, 34.478363037109375, 112.50265502929688, 25.73138427734375, 2.910491943359375, 187.62054443359375, 153.8658447265625, 27.217132568359375, 178.308349609375, -111.05941772460938, 17.59143829345703, -87.76622009277344, 66.70191955566406, 240.47923278808594, -174.53976440429688, 3.716339111328125, -0.809600830078125, -117.9114990234375, 208.59494018554688, 34.536407470703125, -4.23529052734375, 148.79925537109375, 158.12826538085938, 90.86126708984375, 96.8946533203125, 28.619239807128906, -10.92287826538086, 79.06182861328125, 189.33758544921875, 286.88336181640625, 17.361541748046875, 78.15263366699219, 145.01287841796875, 8.198219299316406, -140.45550537109375, -3.6690521240234375, 150.21263122558594, 53.95591735839844, 281.9414367675781, 38.855323791503906, 31.704002380371094, -59.87762451171875, 10.5634765625, -60.34427261352539, 95.70217895507812, 307.32623291015625, 121.10017395019531, 151.22610473632812, 175.46902465820312, 42.4344482421875, 118.66903686523438, 7.894105911254883, 54.60009765625, 4.416965484619141, -68.17625427246094, -63.266693115234375, -12.058807373046875, 1.4417724609375, 80.79197692871094, 15.772552490234375, -100.34327697753906, 206.1063995361328, 64.00431823730469, 27.564178466796875, -55.60398864746094, 124.66006469726562, 4.9914703369140625, 123.97062683105469, -64.53899383544922, 167.9141845703125, 0.0, -20.68682861328125, 58.23571014404297, -201.99911499023438, 10.7276611328125, -90.07080078125, 119.78341674804688, -158.156005859375, -8.011459350585938, 7.30499267578125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000425.npy"}
{"epoch": 0.8900523560209425, "step": 426, "batch_size": 128, "mean": 67.76298522949219, "std": 96.05062103271484, "min": -248.59124755859375, "p10": -36.71165161132812, "median": 56.26972961425781, "p90": 210.05701751708983, "max": 312.7043151855469, "pos_frac": 0.75, "sample": [165.98223876953125, -8.588027954101562, 21.126840591430664, -36.03619384765625, 190.99835205078125, 79.27019500732422, 226.0631103515625, 10.911645889282227, 168.6710205078125, 58.7291259765625, 117.73640441894531, 0.27386474609375, 48.42144775390625, -62.9913330078125, -54.639007568359375, 89.80270385742188, 10.953765869140625, 144.07196044921875, 192.10470581054688, 55.9422607421875, 80.8812255859375, 61.7850341796875, -13.883102416992188, -26.05079460144043, 2.8990707397460938, 75.79768371582031, 154.81808471679688, 43.386985778808594, -43.46112060546875, -90.8058853149414, -10.211563110351562, 73.36329650878906, 74.46762084960938, -11.173080444335938, 145.506591796875, -0.06437301635742188, 61.414398193359375, 168.68081665039062, 191.79397583007812, 146.76040649414062, -122.21441650390625, 59.774810791015625, 206.97906494140625, 36.22679138183594, 79.86272430419922, 11.115631103515625, 104.35286712646484, 214.8873291015625, 48.35395050048828, -0.7501220703125, 101.37115478515625, 75.41427612304688, 133.7884521484375, 65.633544921875, 0.0, 9.660964965820312, 226.80978393554688, 27.907562255859375, 47.23583984375, 49.18175506591797, -62.80706787109375, 212.127685546875, 169.86953735351562, 231.7642822265625, -31.1663818359375, -21.65863800048828, -248.59124755859375, -38.2877197265625, 56.303192138671875, 210.4779510498047, 3.913055419921875, -110.05989837646484, 14.352638244628906, -9.937255859375, 167.37962341308594, 206.11126708984375, -103.16036987304688, 28.488561630249023, 273.40142822265625, 101.05500793457031, -27.510696411132812, 98.94497680664062, 19.292163848876953, 259.79327392578125, -26.776580810546875, 312.7043151855469, 66.65078735351562, 0.9300384521484375, 54.86518096923828, 209.87661743164062, 54.251708984375, 49.9130859375, 18.52710723876953, -20.961713790893555, -51.350555419921875, 15.562774658203125, -31.001617431640625, 138.33338928222656, 168.42771911621094, 67.17152404785156, 222.42510986328125, 183.29281616210938, 14.199462890625, 148.57583618164062, 56.23626708984375, 228.77197265625, 70.72074890136719, 6.616291046142578, 78.48634338378906, 105.38922119140625, -49.831268310546875, -56.282630920410156, -11.088493347167969, 74.19166564941406, 7.2148590087890625, 55.11390686035156, 61.9561767578125, 77.24482727050781, 163.32598876953125, 41.45562744140625, -17.45113754272461, -22.654998779296875, 228.03988647460938, 175.7161865234375, 169.87387084960938, 78.3605728149414, 49.7733154296875, 226.4681396484375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000426.npy"}
{"epoch": 0.8921465968586387, "step": 427, "batch_size": 128, "mean": 86.9222183227539, "std": 108.06788635253906, "min": -257.746337890625, "p10": -29.78341827392578, "median": 71.90339660644531, "p90": 234.21278381347656, "max": 307.0994567871094, "pos_frac": 0.796875, "sample": [16.84869384765625, 234.17108154296875, 35.060821533203125, -19.599365234375, -43.5142822265625, 53.99127197265625, 36.379241943359375, 47.297393798828125, 68.33737182617188, -1.6649551391601562, 162.70965576171875, 251.43521118164062, -23.673690795898438, 175.5714569091797, 116.16458129882812, -62.98939514160156, 48.792236328125, 66.731689453125, 34.081878662109375, 52.64512634277344, 147.59930419921875, 218.76226806640625, 20.145339965820312, 18.583324432373047, 252.34521484375, -129.85888671875, 167.0364990234375, -68.32937622070312, 82.86564636230469, 32.24285888671875, 52.052024841308594, -27.05328369140625, 70.01422119140625, 81.09947204589844, 75.60124206542969, 182.0933837890625, 104.96210479736328, 19.77045249938965, -19.11956787109375, -130.1753387451172, 214.54049682617188, 41.54191589355469, 1.1123199462890625, 19.72442626953125, 36.64244079589844, 199.8973388671875, 234.31008911132812, 306.96435546875, 147.5730438232422, -123.52769470214844, -257.746337890625, 55.37762451171875, 47.66441345214844, 106.26055908203125, 48.06428527832031, 12.183151245117188, 36.54966735839844, 191.05233764648438, 187.59890747070312, 109.68180847167969, 195.69613647460938, 69.59785461425781, 8.794891357421875, -7.689208984375, 183.03314208984375, 111.51570129394531, 87.13592529296875, 93.38075256347656, 222.84727478027344, 12.9410400390625, -119.55949401855469, 60.17822265625, 268.7880859375, 18.55279541015625, 220.50149536132812, 183.8321533203125, 180.88229370117188, -35.387203216552734, 8.87701416015625, -28.694107055664062, -32.986114501953125, -122.96533203125, 276.707763671875, 17.94782829284668, 140.51052856445312, 232.25250244140625, 17.522308349609375, 73.64459228515625, 173.2418212890625, 139.572265625, 134.1961669921875, 167.26678466796875, 255.89230346679688, 92.94186401367188, -24.838912963867188, 6.405723571777344, 237.7896728515625, -10.892078399658203, 161.15994262695312, 161.57296752929688, -69.64413452148438, 77.05693054199219, -9.208690643310547, 307.0994567871094, 48.275394439697266, 99.49044799804688, 262.1463623046875, 189.63427734375, 140.38771057128906, 80.44683074951172, 189.63165283203125, -10.60516357421875, -31.584762573242188, 96.38789367675781, 189.27198791503906, 4.060676574707031, 122.90525817871094, 151.67462158203125, 204.851318359375, -22.222610473632812, -29.01141357421875, 70.16220092773438, 277.6192626953125, 212.5792236328125, 45.184600830078125, 144.20059204101562, 295.25140380859375, 242.95709228515625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000427.npy"}
{"epoch": 0.8942408376963351, "step": 428, "batch_size": 128, "mean": 78.93218231201172, "std": 106.32775115966797, "min": -323.18408203125, "p10": -41.83524856567383, "median": 68.88507843017578, "p90": 213.9078506469726, "max": 363.531494140625, "pos_frac": 0.7734375, "sample": [56.93645095825195, -48.150543212890625, -42.250526428222656, 255.48858642578125, 8.049474716186523, 32.91957092285156, -6.717254638671875, 60.71478271484375, 35.14599609375, 6.5999755859375, 48.72235107421875, -31.126617431640625, 51.132843017578125, -13.351486206054688, 190.22093200683594, 169.1490478515625, 161.45230102539062, 13.11297607421875, 129.1640625, 88.70179748535156, -70.646484375, 151.39346313476562, 166.01089477539062, 2.628843307495117, 50.61531066894531, 22.482498168945312, 102.75200653076172, 96.91152954101562, 6.014955520629883, -13.043212890625, 193.31219482421875, 182.13348388671875, 254.85494995117188, -87.4000244140625, 79.9852294921875, 158.3353271484375, -13.035572052001953, 168.67837524414062, 62.192657470703125, 46.2613525390625, 115.03692626953125, 26.44793701171875, -153.02081298828125, 320.2139892578125, 174.64930725097656, 62.72059631347656, 58.870849609375, 28.20947265625, 179.85897827148438, 78.18914794921875, 30.507736206054688, 62.84959411621094, 20.87200927734375, 155.04373168945312, 227.49777221679688, 100.41973114013672, -66.46504211425781, -42.55950927734375, -15.733322143554688, 9.04608154296875, 43.24615478515625, 90.03408813476562, 138.96658325195312, -22.5794677734375, -2.77862548828125, -41.65727233886719, 363.531494140625, 42.5191650390625, 138.89910888671875, -6.881587982177734, -63.9686279296875, 78.08332061767578, 223.5602264404297, 209.7711181640625, 197.63165283203125, 122.44025421142578, 240.88058471679688, 47.1195068359375, -323.18408203125, 268.5066223144531, 36.95176696777344, 68.99198913574219, 142.5425567626953, 26.545379638671875, 186.25506591796875, 89.6893081665039, -3.0884857177734375, 121.19277954101562, -23.217041015625, 118.58604431152344, 311.4833984375, 254.0682373046875, -12.125869750976562, 24.1224365234375, 240.25482177734375, -4.450130462646484, 207.6148223876953, 118.021728515625, -8.9774169921875, 171.5677947998047, 161.41061401367188, 202.82077026367188, -157.67291259765625, 0.436859130859375, 128.2415771484375, 109.817138671875, 82.11422729492188, 237.43295288085938, -77.72321319580078, 204.9734344482422, 132.5655517578125, 147.3933868408203, 83.22077941894531, 11.483444213867188, 7.8422698974609375, 61.10992431640625, 12.440372467041016, 163.87496948242188, -3.982147216796875, 91.72288513183594, 86.27813720703125, 68.77816772460938, 193.43603515625, -126.21853637695312, -45.036285400390625, 71.00564575195312, 227.19805908203125, 119.17819213867188], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000428.npy"}
{"epoch": 0.8963350785340314, "step": 429, "batch_size": 128, "mean": 72.86721801757812, "std": 113.5780029296875, "min": -517.1026000976562, "p10": -47.67298431396484, "median": 65.19142150878906, "p90": 210.18731231689452, "max": 327.6539306640625, "pos_frac": 0.734375, "sample": [99.3211669921875, -212.2852783203125, -35.51959228515625, 87.36848449707031, 165.48410034179688, 28.97723388671875, 155.23519897460938, 150.94882202148438, 30.408700942993164, 270.9271240234375, -54.463531494140625, 59.57061767578125, 34.34831237792969, -47.435546875, 7.919498443603516, 155.9215087890625, -80.030029296875, -517.1026000976562, 177.9400634765625, 14.68536376953125, 15.509159088134766, -2.2513198852539062, 51.58245849609375, 53.390625, 1.5221691131591797, 0.0, 230.70687866210938, 168.28436279296875, -3.49285888671875, 135.87899780273438, 130.1234130859375, 83.2728271484375, 149.37094116210938, 171.2578887939453, 69.31774139404297, 41.1466064453125, -135.52481079101562, -2.843841552734375, 172.55702209472656, 327.6539306640625, 160.5895233154297, 99.43255615234375, -73.485107421875, 142.76376342773438, 181.6839599609375, 211.63107299804688, 58.68402099609375, 61.13116455078125, 122.07701110839844, 154.42996215820312, -116.2353515625, 55.86391067504883, -32.184234619140625, 9.3602294921875, -67.06988525390625, 41.21446228027344, 93.29782104492188, 96.9952392578125, 209.5685577392578, 85.66104125976562, 15.467090606689453, 39.471466064453125, 104.87008666992188, 216.4725341796875, 6.97088623046875, 184.708740234375, 7.937156677246094, -84.82211303710938, 116.5433349609375, 157.81239318847656, 21.456130981445312, -15.554267883300781, -23.06121063232422, 316.48529052734375, 124.62612915039062, 202.69631958007812, 310.3020324707031, -48.22700500488281, -55.66923522949219, 178.373046875, -29.00775146484375, -11.998672485351562, 217.68988037109375, -18.898025512695312, 60.6243896484375, 175.409912109375, 28.87872314453125, 236.5074462890625, -18.350250244140625, 14.612863540649414, 226.00364685058594, -35.302001953125, 44.05119323730469, 154.377197265625, -8.6630859375, 39.66583251953125, 69.25167846679688, 114.40005493164062, 138.86737060546875, 139.4761962890625, 91.82682800292969, -8.7254638671875, 189.53140258789062, 15.1220703125, 60.577301025390625, 73.4888916015625, -62.31884765625, 162.88851928710938, -3.7515869140625, 78.34075927734375, 121.90054321289062, 198.48358154296875, 217.80331420898438, -2.9619293212890625, 42.1627197265625, 126.1694564819336, -54.458099365234375, 99.86094665527344, 312.6617736816406, 224.39556884765625, 174.6288604736328, -15.061088562011719, -25.526809692382812, 157.81683349609375, 103.10498046875, 2.8663787841796875, 99.810546875, -13.181182861328125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000429.npy"}
{"epoch": 0.8984293193717278, "step": 430, "batch_size": 128, "mean": 63.0407829284668, "std": 99.4947509765625, "min": -159.69827270507812, "p10": -58.81851806640625, "median": 52.31348419189453, "p90": 203.7016174316406, "max": 325.68170166015625, "pos_frac": 0.6953125, "sample": [-62.352874755859375, -88.18765258789062, -2.7166290283203125, 50.59385299682617, 150.383544921875, 51.3858642578125, 39.168701171875, 0.0, 325.68170166015625, 114.83976745605469, 5.185306549072266, 142.77919006347656, -41.662017822265625, 218.54345703125, 74.7105941772461, 20.028564453125, -16.41900634765625, 145.9776611328125, 156.52352905273438, 148.02096557617188, 110.33697509765625, 115.86248779296875, 10.294244766235352, 5.0341796875, 182.9818115234375, 48.10443115234375, -10.709510803222656, 150.40032958984375, 184.2650146484375, -23.4967041015625, 79.392333984375, 80.0211181640625, 136.85617065429688, 78.31329345703125, -61.390411376953125, 106.25106811523438, 11.553573608398438, 10.776626586914062, 31.5931396484375, 151.20431518554688, 52.95359802246094, -12.160781860351562, 161.45877075195312, 127.73056030273438, 267.8664245605469, -4.8533935546875, 135.91461181640625, 142.53756713867188, 30.935548782348633, 202.825439453125, 60.44415283203125, 80.24789428710938, 205.74603271484375, 143.0859375, 107.81195068359375, -145.0892333984375, -74.1786117553711, 27.153778076171875, -53.326568603515625, 245.265625, 3.324859619140625, 72.65032958984375, 23.24560546875, -70.18342590332031, -14.366485595703125, -2.9495086669921875, 75.32574462890625, 3.0164566040039062, -65.42813110351562, 230.60165405273438, 39.310455322265625, 73.6239013671875, 61.5472412109375, 39.24729919433594, -43.2974853515625, 0.0, 66.90934753417969, -57.716278076171875, 228.77227783203125, -74.62576293945312, 217.50531005859375, 241.59877014160156, 4.7186279296875, -2.0065383911132812, -49.4996337890625, 117.30451965332031, 125.87479400634766, 64.53429412841797, 0.0, -18.746826171875, 36.85999298095703, 60.097747802734375, -32.89154052734375, -113.904296875, 89.01214599609375, 193.1241912841797, 70.72264099121094, -120.54195404052734, 51.673370361328125, -101.12139892578125, 188.67327880859375, 46.163299560546875, 16.18433380126953, 129.2118682861328, 229.89691162109375, -39.244384765625, 96.56680297851562, -18.98297119140625, 251.2293243408203, 247.3872833251953, 187.57965087890625, 198.21832275390625, -4.21832275390625, 75.13272094726562, -25.746187210083008, 40.253753662109375, 109.18621826171875, 116.71661376953125, -159.69827270507812, -49.46233367919922, 71.481201171875, -13.14886474609375, 303.971435546875, 0.0002002716064453125, 123.99597930908203, -0.48321533203125, -99.3590087890625, 91.89395141601562], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000430.npy"}
{"epoch": 0.900523560209424, "step": 431, "batch_size": 128, "mean": 53.12904357910156, "std": 95.34056854248047, "min": -147.27749633789062, "p10": -46.517053222656244, "median": 29.435028076171875, "p90": 187.84557800292967, "max": 341.846923828125, "pos_frac": 0.7265625, "sample": [-68.92597961425781, -83.52178955078125, 136.56149291992188, 52.467529296875, -21.656539916992188, -41.614418029785156, -90.77384948730469, 259.4989013671875, -98.01190185546875, 185.9627685546875, 154.98910522460938, 19.01012420654297, -8.730121612548828, -147.27749633789062, -41.80377197265625, 38.53216552734375, 29.4454345703125, 29.180328369140625, 49.0606689453125, 87.53082275390625, 26.525848388671875, 175.04739379882812, 23.739534378051758, 17.979736328125, 194.24560546875, 118.58358764648438, 0.1013946533203125, -83.7198486328125, 153.90484619140625, 196.050048828125, 27.9752197265625, 80.332275390625, 2.95977783203125, 103.3714599609375, 6.3603057861328125, 151.8447265625, -44.34303283691406, -26.50244140625, -45.246612548828125, 78.8515625, -36.78912353515625, 147.17584228515625, 48.31233215332031, -115.51231384277344, 6.7315216064453125, 118.12945556640625, -7.6947784423828125, 46.23858642578125, 2.0544204711914062, -115.65737915039062, 175.853271484375, 11.050762176513672, 113.26322937011719, 114.58968353271484, -2.931884765625, 40.8060302734375, 9.892440795898438, -23.182830810546875, 80.96646881103516, 29.3795166015625, 7.6518096923828125, -3.442535400390625, -16.77625274658203, 94.87911987304688, -11.972686767578125, -39.41845703125, 29.42462158203125, 7.110992431640625, -0.9653244018554688, -115.28314208984375, 3.7086639404296875, 34.61181640625, 64.82962036132812, 108.65097045898438, 28.0416259765625, -118.74151611328125, 65.62960815429688, 156.04205322265625, 123.30145263671875, -14.678558349609375, 62.80047607421875, 232.02325439453125, 49.27667236328125, 224.98745727539062, 49.34326171875, 13.370800018310547, 93.29594421386719, 27.874099731445312, -49.481414794921875, 21.518753051757812, 234.4173583984375, 21.963703155517578, -74.84385681152344, 8.657440185546875, 7.245424270629883, 105.39556884765625, 40.66796875, 52.69927978515625, -120.29376220703125, 144.7854766845703, 146.33375549316406, 184.1766357421875, 165.49688720703125, -18.395355224609375, 240.45237731933594, 276.907470703125, 107.92886352539062, 111.16912841796875, -4.69232177734375, 36.92615509033203, 33.609130859375, 66.20697021484375, 58.143096923828125, 192.23880004882812, 252.77603149414062, 218.9578399658203, 23.53555679321289, -20.656173706054688, 16.099288940429688, -4.44464111328125, 163.89532470703125, -9.416793823242188, 4.919013977050781, 271.76568603515625, 23.993377685546875, 341.846923828125, 74.88543701171875, 56.89106750488281], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000431.npy"}
{"epoch": 0.9026178010471204, "step": 432, "batch_size": 128, "mean": 69.2044906616211, "std": 103.2371597290039, "min": -180.40240478515625, "p10": -27.7110445022583, "median": 48.37811279296875, "p90": 194.53856811523437, "max": 402.9842529296875, "pos_frac": 0.734375, "sample": [-0.65338134765625, 78.100341796875, 54.8260498046875, 256.25396728515625, 169.37228393554688, 223.69595336914062, -80.3123779296875, -15.912734985351562, 55.57611083984375, 155.40846252441406, 106.52012634277344, 25.19684600830078, -139.47991943359375, 5.1728515625, 117.91114807128906, 28.202003479003906, 112.35382080078125, -31.86382293701172, 170.82003784179688, 248.58531188964844, -69.59561157226562, 24.7161865234375, -10.039276123046875, 63.377593994140625, 31.74493408203125, 33.09800720214844, 234.461669921875, -20.191986083984375, 66.23434448242188, 300.71588134765625, -24.427316665649414, 21.266876220703125, 193.47760009765625, 197.1998291015625, 130.09234619140625, 62.41133117675781, 146.24111938476562, -34.446014404296875, 99.61727905273438, 11.561187744140625, 73.31879425048828, -127.41189575195312, -7.7276611328125, -27.73714256286621, -15.764419555664062, 301.18798828125, -13.053115844726562, -6.76898193359375, 174.28240966796875, 45.005531311035156, -10.579658508300781, 46.776123046875, 65.34814453125, -15.653438568115234, -15.527528762817383, 90.105712890625, 104.92230224609375, 6.7301025390625, 98.69400024414062, 175.20005798339844, 166.99359130859375, 137.4144744873047, 11.307754516601562, 39.8052978515625, -165.2159423828125, -180.40240478515625, 402.9842529296875, 10.875167846679688, 70.90392303466797, 105.16891479492188, 3.7012939453125, 1.7191905975341797, -113.61029052734375, -5.463258743286133, -59.5478515625, 169.00213623046875, -3.1866836547851562, 95.25570678710938, 5.425025939941406, 17.014205932617188, 152.38861083984375, -9.955879211425781, 34.7828369140625, 268.15643310546875, 143.29075622558594, 33.83269500732422, 199.35946655273438, 100.91368103027344, -18.839752197265625, 191.0519561767578, 69.0087890625, -133.98495483398438, 99.31120300292969, 177.47779846191406, -8.217742919921875, 9.782447814941406, 197.01416015625, 12.9005126953125, 49.9801025390625, 15.184410095214844, 1.8027572631835938, 44.51182556152344, 99.90771484375, 92.92828369140625, -5.613655090332031, 51.24615478515625, 42.103759765625, 261.1146545410156, 18.028717041015625, 12.178298950195312, 187.24615478515625, -16.197052001953125, 135.81263732910156, 189.6697998046875, 173.45645141601562, 174.83697509765625, 184.49127197265625, 124.5626220703125, 14.05419921875, 173.36834716796875, 0.0, 109.084716796875, 153.360595703125, -35.39349365234375, 329.9720458984375, 98.5291748046875, 38.59311294555664, -27.699859619140625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000432.npy"}
{"epoch": 0.9047120418848168, "step": 433, "batch_size": 128, "mean": 63.14564514160156, "std": 93.34461212158203, "min": -183.86114501953125, "p10": -47.380093002319335, "median": 56.339202880859375, "p90": 177.31327514648436, "max": 323.7015380859375, "pos_frac": 0.8125, "sample": [8.923479080200195, -177.25173950195312, -66.0869140625, 172.59585571289062, 135.148681640625, 105.1727294921875, 4.323516845703125, 166.03924560546875, -125.7874755859375, 174.93850708007812, 81.734375, 63.069122314453125, 127.45233154296875, 24.203266143798828, 99.0020980834961, 160.77059936523438, 172.1629180908203, 63.649749755859375, 146.28099060058594, -174.0787353515625, 129.42044067382812, 138.55970764160156, 108.44031524658203, -7.70416259765625, -54.995574951171875, 15.616455078125, 190.64083862304688, 38.380340576171875, 151.77218627929688, -34.88310241699219, 34.97998046875, 63.0565185546875, 28.641159057617188, 27.059722900390625, 105.43934631347656, 3.08453369140625, 103.69123840332031, 134.49200439453125, -83.22430419921875, 116.927734375, 71.48919677734375, 108.07522583007812, 116.72348022460938, -27.905838012695312, 111.9718246459961, 24.93438720703125, -18.612884521484375, -87.40359497070312, 83.27194213867188, 127.79747009277344, 3.340362548828125, 27.151939392089844, 115.10887145996094, -17.12873077392578, -1.4887313842773438, 323.7015380859375, 46.497406005859375, -164.7730712890625, 36.23100280761719, -20.29076385498047, 162.0183868408203, 3.4391708374023438, 64.87239837646484, 266.5988464355469, -19.35484504699707, 58.162322998046875, 99.900634765625, 68.79672241210938, -83.53985595703125, 26.048370361328125, 171.34359741210938, 237.42367553710938, 5.897026062011719, -48.233642578125, 184.47930908203125, 12.383464813232422, 118.56924438476562, 182.85440063476562, 35.04972457885742, 26.871828079223633, 91.12405395507812, 27.512855529785156, 44.33538818359375, 36.73539733886719, 25.792724609375, 27.1904296875, 252.814697265625, 195.87161254882812, 157.81130981445312, 44.508270263671875, 3.3604049682617188, 7.685142517089844, 117.35809326171875, 22.44635009765625, -183.86114501953125, 5.203208923339844, 154.85411071777344, 92.59561157226562, 72.76107788085938, 6.976806640625, 27.263336181640625, 213.3948974609375, 71.115478515625, -47.014286041259766, 240.20144653320312, 56.23443603515625, 152.7225341796875, 72.5733871459961, -38.257568359375, 35.45396423339844, 46.78540802001953, 76.5272216796875, 136.5738525390625, 90.03617095947266, 127.28270721435547, -69.8106689453125, 231.62753295898438, 19.371280670166016, 196.41915893554688, 56.4439697265625, -60.63665771484375, -0.17299270629882812, 52.69902038574219, 15.453758239746094, 108.96003723144531, 3.696197509765625, 239.07545471191406, 21.617267608642578], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000433.npy"}
{"epoch": 0.9068062827225131, "step": 434, "batch_size": 128, "mean": 63.729976654052734, "std": 95.30543518066406, "min": -158.01898193359375, "p10": -35.11906967163086, "median": 58.971435546875, "p90": 193.6848205566406, "max": 376.57403564453125, "pos_frac": 0.7109375, "sample": [101.46176147460938, 27.5302734375, -6.6580810546875, -5.259735107421875, -18.6944580078125, 49.142669677734375, 144.36639404296875, 54.52239990234375, 58.734771728515625, 138.6844482421875, -32.154205322265625, -5.672454833984375, 26.454559326171875, 58.330963134765625, 71.50204467773438, 205.1870574951172, -22.053558349609375, 59.570556640625, -1.8247222900390625, 109.12039184570312, -26.9698486328125, 376.57403564453125, 211.75497436523438, -28.07259750366211, 126.738525390625, -107.73068237304688, 99.18637084960938, 187.13693237304688, 0.0, 152.14459228515625, 16.016830444335938, -16.641571044921875, 142.65505981445312, 66.9517822265625, 59.25458526611328, 100.5234603881836, 29.37109375, 69.10345458984375, 15.359039306640625, 181.56402587890625, 147.48696899414062, 333.6165771484375, -7.97393798828125, 261.4437255859375, 67.8019790649414, 161.77587890625, 78.80244445800781, 115.98847961425781, 81.014404296875, -36.40876007080078, 202.19483947753906, 67.25910949707031, 109.76345825195312, -9.823760986328125, 15.28148078918457, 119.9132080078125, 89.58441162109375, 101.14013671875, 179.12750244140625, -157.490478515625, 29.204315185546875, 111.0130615234375, 48.583892822265625, -0.149169921875, 35.19647216796875, 177.96435546875, -31.41295623779297, 123.48469543457031, 3.4720458984375, 197.740966796875, 167.95843505859375, -34.56634521484375, 191.94647216796875, -158.01898193359375, 0.0, 55.864501953125, 67.730224609375, 12.031524658203125, 30.6158447265625, 136.83895874023438, -40.843223571777344, 178.1793212890625, -80.1636962890625, 76.32396697998047, -0.5922889709472656, 32.03717041015625, 2.19549560546875, -23.256210327148438, 72.52572631835938, -39.8385009765625, 106.93803405761719, -29.524200439453125, -129.99766540527344, 49.813018798828125, -2.0191650390625, -3.464141845703125, 3.1103515625, 0.5858154296875, -126.76732635498047, 18.09881591796875, 25.288314819335938, -47.63970947265625, 96.34658813476562, 200.31417846679688, -42.68083190917969, 99.8466796875, 246.5277099609375, 158.13623046875, 78.1038818359375, 68.22543334960938, 210.421875, -18.16201400756836, 89.77224731445312, 231.44573974609375, 203.36459350585938, 59.208099365234375, 153.57501220703125, 80.06329345703125, 44.85626220703125, 16.590749740600586, -133.24832153320312, 0.0, 94.23403930664062, 43.60881042480469, -79.73214721679688, 223.725830078125, 81.58878326416016, 157.10714721679688], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000434.npy"}
{"epoch": 0.9089005235602095, "step": 435, "batch_size": 128, "mean": 68.8076400756836, "std": 96.84156036376953, "min": -193.63775634765625, "p10": -36.03787384033203, "median": 55.83588409423828, "p90": 178.53321838378903, "max": 518.3612060546875, "pos_frac": 0.75, "sample": [160.0225067138672, 36.462158203125, -3.996795654296875, 91.09098815917969, 148.460205078125, 113.4559097290039, -44.549598693847656, 246.9007568359375, 70.64471435546875, 92.33224487304688, 275.6809387207031, 98.0924072265625, 131.58602905273438, 71.45584106445312, 155.01364135742188, 384.2167053222656, 37.368186950683594, 0.25311279296875, 138.53201293945312, 159.4833984375, 88.04452514648438, -14.51275634765625, -53.409095764160156, 160.30230712890625, -60.486083984375, 109.31243896484375, 227.685546875, 142.41600036621094, -10.0972900390625, 36.88426971435547, 4.3353271484375, 236.51806640625, 75.08428955078125, 232.2321319580078, 199.43255615234375, -36.83433532714844, 149.73202514648438, 155.20498657226562, 94.75588989257812, -11.903350830078125, 37.85345458984375, 13.483762741088867, 118.39743041992188, 0.0, 100.86672973632812, 104.485595703125, 11.306873321533203, 71.54307556152344, -10.811334609985352, 63.87257385253906, 42.3262939453125, 22.10009765625, 15.314407348632812, 176.14547729492188, 7.846656799316406, -23.2141170501709, -35.696533203125, 20.667144775390625, 175.52960205078125, -1.9985675811767578, 10.511749267578125, -2.47283935546875, 43.7987060546875, -16.0699462890625, 79.96440124511719, -7.25872802734375, -15.11948013305664, -3.6153640747070312, 187.94937133789062, 173.05325317382812, 203.8734130859375, 41.363037109375, 15.235971450805664, -28.00640869140625, 64.14431762695312, 45.197608947753906, 54.234649658203125, 38.963348388671875, 184.1046142578125, -68.80047607421875, 18.665283203125, 243.02825927734375, 203.86453247070312, 61.168609619140625, 136.95379638671875, -73.13189697265625, -32.799774169921875, 67.73321533203125, -7.375801086425781, 105.4490966796875, -29.836170196533203, 103.24496459960938, 69.25933837890625, 57.94122314453125, 25.987648010253906, -55.17521667480469, -193.63775634765625, 105.05426788330078, 28.70726776123047, 17.848419189453125, 153.0631561279297, 19.94403076171875, 121.67599487304688, -37.514434814453125, 111.9764404296875, 5.444511413574219, 53.79939270019531, 172.8312225341797, -23.886428833007812, 103.82147216796875, 21.237720489501953, -101.93431091308594, 145.6515655517578, 171.61962890625, -51.650665283203125, 62.83111572265625, 57.43711853027344, 17.467918395996094, -37.22007751464844, 94.67539978027344, 122.63299560546875, 518.3612060546875, 102.34829711914062, 28.081527709960938, -43.9954833984375, 143.49827575683594, 4.28936767578125, 19.66888427734375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000435.npy"}
{"epoch": 0.9109947643979057, "step": 436, "batch_size": 128, "mean": 55.97534942626953, "std": 103.63855743408203, "min": -199.63189697265625, "p10": -63.937643432617186, "median": 39.23704528808594, "p90": 186.8712158203125, "max": 448.082275390625, "pos_frac": 0.734375, "sample": [41.015594482421875, -92.4962158203125, 167.84786987304688, 79.03327941894531, -58.70184326171875, 26.35009765625, 137.90293884277344, 109.11096954345703, 53.62530517578125, -43.65904235839844, -161.50906372070312, -199.63189697265625, 41.18695068359375, 175.25418090820312, 156.44537353515625, -62.719268798828125, 146.10708618164062, 217.4155731201172, -13.198577880859375, -93.91873168945312, 253.2913818359375, 0.27142333984375, 34.21628952026367, -96.65054321289062, 190.10205078125, 39.968780517578125, 25.188018798828125, -14.107627868652344, 12.905731201171875, 216.12600708007812, -41.54693603515625, 158.0869598388672, -62.065189361572266, -2.550323486328125, 148.2134552001953, 17.90277862548828, 22.7864990234375, 79.79894256591797, 185.486572265625, 27.1932373046875, 27.550933837890625, 42.95555877685547, 82.37538146972656, 150.80633544921875, -40.26336669921875, -39.71160888671875, 58.226470947265625, 12.594200134277344, 29.229644775390625, 236.57510375976562, -6.694549560546875, 260.02154541015625, 23.849761962890625, -122.98785400390625, 66.62727355957031, 58.21527099609375, 111.19970703125, 128.58099365234375, 121.64458465576172, 11.115989685058594, 17.624053955078125, 223.26992797851562, 4.745597839355469, 107.93499755859375, -66.780517578125, 448.082275390625, 0.09222412109375, 126.08267211914062, 133.3681182861328, -1.3734893798828125, 91.68899536132812, 96.44833374023438, 250.4788818359375, -38.98756408691406, 133.00115966796875, -16.661651611328125, 150.40243530273438, -117.96994018554688, -57.168701171875, 229.63656616210938, 104.30841064453125, -5.94329833984375, 41.977020263671875, 87.75588989257812, 52.67439270019531, 45.50079345703125, -78.11715698242188, 112.4935302734375, 92.48347473144531, -84.537109375, 320.1797790527344, 28.581398010253906, -1.99652099609375, 53.570220947265625, -33.170562744140625, 2.0162353515625, 31.2061767578125, 58.05694580078125, 14.782928466796875, 258.5028381347656, 89.525390625, 38.179443359375, 13.018112182617188, -109.33316040039062, 170.06094360351562, 36.40374755859375, 95.27562713623047, 32.53477478027344, 184.21444702148438, 58.682029724121094, 62.24037170410156, -9.64455795288086, 24.771228790283203, 44.58230209350586, 38.50531005859375, 7.9460296630859375, 291.71221923828125, 5.327629089355469, -15.604034423828125, 109.06873321533203, -4.491926193237305, -139.05380249023438, 6.376800537109375, 136.3934326171875, -73.289306640625, 116.6888427734375, 21.886871337890625, 86.63818359375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000436.npy"}
{"epoch": 0.9130890052356021, "step": 437, "batch_size": 128, "mean": 75.65726470947266, "std": 105.5650634765625, "min": -150.17938232421875, "p10": -53.75929718017577, "median": 71.79927825927734, "p90": 233.84460906982423, "max": 320.0421142578125, "pos_frac": 0.734375, "sample": [238.3792724609375, 132.88568115234375, 134.74655151367188, 89.7958984375, 25.13623046875, 42.901885986328125, 74.84609985351562, 47.08369064331055, -22.3037109375, 8.1229248046875, 60.0316162109375, 116.547119140625, 108.7862548828125, 140.54412841796875, 131.31744384765625, 181.02279663085938, 216.01724243164062, -50.42521667480469, 4.8048858642578125, 116.162109375, 68.54733276367188, 96.43218231201172, 178.05126953125, 26.827903747558594, 157.55128479003906, -48.80558776855469, -79.53431701660156, 81.37779998779297, 115.78230285644531, -108.5799560546875, 52.481842041015625, 233.8083038330078, -125.10284423828125, 292.35986328125, -15.20819091796875, 30.526260375976562, -31.53534698486328, 92.94992065429688, -7.9721832275390625, 44.90252685546875, -0.430267333984375, 273.321533203125, 316.0984802246094, 91.36883544921875, -40.99304962158203, 233.9293212890625, 100.80488586425781, 16.163177490234375, -90.01309204101562, 21.58049774169922, 57.46392822265625, 2.46868896484375, 224.4181365966797, 202.29669189453125, 118.74258422851562, 243.82371520996094, 205.95733642578125, 114.83157348632812, -100.24435424804688, 73.02610778808594, 57.749664306640625, 114.19621276855469, 58.457244873046875, 275.9954833984375, 128.55322265625, 70.57244873046875, 260.81756591796875, 52.20173645019531, 203.206787109375, 119.12347412109375, 133.53207397460938, 277.5967102050781, 35.20335388183594, 132.12106323242188, -11.939701080322266, 58.33001708984375, -67.55984497070312, -14.150558471679688, 320.0421142578125, 168.47329711914062, -150.17938232421875, 95.93588256835938, -61.538818359375, 284.26934814453125, 87.3622817993164, -0.2011566162109375, 133.64614868164062, -116.58193969726562, -11.96380615234375, 106.72991943359375, 237.0687255859375, 91.02505493164062, 12.432571411132812, -25.87896728515625, 147.9453125, 144.436279296875, -49.17408752441406, 96.17343139648438, 37.03668212890625, 107.65179443359375, -105.99337768554688, 13.922119140625, 66.00137329101562, -13.687255859375, 147.69772338867188, 36.21543884277344, -39.52874755859375, 95.25128173828125, 99.3083267211914, 34.5225830078125, 109.69035339355469, 131.62384033203125, -17.819726943969727, 129.64556884765625, -90.73080444335938, 0.0, 307.623291015625, -98.79054260253906, 43.482879638671875, -80.89825439453125, 1.1923885345458984, 90.43594360351562, -21.41241455078125, -13.46966552734375, 0.0, 36.3404541015625, 147.33212280273438, 221.57904052734375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000437.npy"}
{"epoch": 0.9151832460732985, "step": 438, "batch_size": 128, "mean": 40.41261672973633, "std": 101.60514831542969, "min": -279.1845703125, "p10": -84.61525268554688, "median": 30.671993255615234, "p90": 172.56863403320312, "max": 314.3106384277344, "pos_frac": 0.65625, "sample": [81.55988311767578, -209.27566528320312, 87.886474609375, -12.7093505859375, 111.15826416015625, 17.019317626953125, 82.84808349609375, 314.3106384277344, 46.39923095703125, 38.14501953125, 43.21601104736328, -17.26653289794922, -92.85440063476562, -35.1190185546875, -18.37542724609375, -134.14114379882812, 45.24238586425781, -12.765155792236328, 69.0833740234375, 33.80773162841797, 9.2493896484375, 211.9254150390625, 244.115966796875, 55.657318115234375, -144.64389038085938, 175.484619140625, -34.00757598876953, -2.359283447265625, -61.678123474121094, 172.2106170654297, 0.0, 4.839111328125, -47.15816116333008, 146.76052856445312, 131.95486450195312, 62.48448181152344, -100.77772521972656, -83.70428466796875, 28.214508056640625, -86.7408447265625, -80.47122192382812, 16.4276123046875, -71.24397277832031, -0.03142547607421875, 4.235145568847656, 9.285812377929688, -40.4940185546875, 55.663612365722656, 74.65580749511719, -12.986480712890625, 221.07162475585938, -59.88804626464844, 111.81743621826172, -53.2271728515625, 115.4097900390625, 46.58433532714844, 144.001220703125, -88.22665405273438, 55.79814147949219, 193.410888671875, 36.705596923828125, 162.43170166015625, -69.70433044433594, 51.059356689453125, 0.0, 23.564937591552734, 304.7134704589844, -118.26776123046875, -143.03799438476562, -65.54029846191406, -3.7349853515625, -94.98274993896484, -2.648834228515625, 8.65582275390625, 35.7310791015625, 72.6668472290039, 29.0130615234375, 100.74822998046875, 133.71807861328125, 114.53038024902344, 187.3892822265625, -50.575965881347656, 158.28274536132812, 162.51901245117188, -5.404319763183594, -40.1458740234375, 16.210693359375, -1.68988037109375, -279.1845703125, 169.63438415527344, 104.90089416503906, 28.08672332763672, 116.3648681640625, -101.26402282714844, 1.4797000885009766, 115.31985473632812, 28.048057556152344, 94.78373718261719, 28.98370361328125, 44.69306945800781, 81.52169799804688, 64.140625, 0.1009063720703125, 38.87939453125, 17.2017822265625, 86.16357421875, 19.708515167236328, 267.5904846191406, -53.86113739013672, -3.8904266357421875, -9.227760314941406, -88.072998046875, 39.57135009765625, 32.33092498779297, 180.49166870117188, 129.19996643066406, -37.9306640625, 65.41683959960938, 200.96170043945312, 15.890817642211914, 60.39806365966797, 70.4195556640625, 144.54461669921875, 156.160888671875, 313.5308837890625, 28.382888793945312, 173.4040069580078, 63.9437255859375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000438.npy"}
{"epoch": 0.9172774869109948, "step": 439, "batch_size": 128, "mean": 64.93077850341797, "std": 97.01757049560547, "min": -180.80032348632812, "p10": -48.22561416625976, "median": 54.190521240234375, "p90": 191.9055679321289, "max": 298.8651123046875, "pos_frac": 0.7265625, "sample": [-87.60089111328125, -86.87921142578125, -72.40655517578125, 268.64508056640625, -2.6654319763183594, 218.77786254882812, -3.198577880859375, 148.03073120117188, 12.022315979003906, 150.26597595214844, 16.017044067382812, 26.34588623046875, -5.95849609375, 13.5848388671875, -69.6094970703125, 41.48674011230469, 54.18890380859375, 196.35791015625, 68.07757568359375, 36.181976318359375, 225.942138671875, 119.94349670410156, 17.20220947265625, 9.34429931640625, -13.264884948730469, 102.07561492919922, 37.38798522949219, 68.930419921875, 162.4005126953125, 154.122802734375, 169.0214080810547, 0.0, -61.7467041015625, 82.54959106445312, 116.55427551269531, 116.75284576416016, 293.13299560546875, 62.179595947265625, 289.30389404296875, 54.192138671875, 160.9193115234375, 179.7476806640625, -1.49884033203125, 75.05801391601562, -113.29228210449219, 166.43882751464844, 119.86904907226562, 249.999755859375, -9.304033279418945, -11.77264404296875, 29.246856689453125, 179.95123291015625, 20.8521728515625, 82.94696044921875, -125.68002319335938, 73.19816589355469, 114.66964721679688, 42.113433837890625, 24.433929443359375, 7.280914306640625, -180.80032348632812, 165.41329956054688, 133.43212890625, 298.8651123046875, 142.85049438476562, 148.72288513183594, -5.818593978881836, 14.41162109375, -52.208770751953125, 10.82303237915039, 239.5671844482422, 209.22842407226562, 11.716644287109375, 74.16122436523438, 103.81491088867188, 190.77978515625, 54.46943664550781, -36.36577606201172, 23.650936126708984, 0.0, -25.396270751953125, -30.776092529296875, -46.51854705810547, 101.20386505126953, 96.73736572265625, 75.20791625976562, 97.44769287109375, 178.5260009765625, -142.8455352783203, 62.53729248046875, 123.13008117675781, -137.53912353515625, 187.15792846679688, 146.0711212158203, 37.98931884765625, -95.60566711425781, 37.02550506591797, 11.42462158203125, 165.92974853515625, -6.2052001953125, -60.7119140625, 194.5323944091797, 164.2884521484375, 126.212158203125, 39.132781982421875, 24.96994972229004, -14.748008728027344, 59.257904052734375, 39.813934326171875, 0.0, 2.5751953125, -2.060302734375, 27.895042419433594, 7.845245361328125, -23.820159912109375, -46.431610107421875, 130.47105407714844, 40.121337890625, 67.23455810546875, 106.47998046875, 0.0, 143.019775390625, 202.4803466796875, 56.195892333984375, 206.0645294189453, 0.0, 178.71771240234375, 66.52047729492188], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000439.npy"}
{"epoch": 0.9193717277486911, "step": 440, "batch_size": 128, "mean": 51.0716667175293, "std": 89.7911376953125, "min": -193.6556396484375, "p10": -58.33069763183593, "median": 41.416996002197266, "p90": 170.52127075195312, "max": 284.26202392578125, "pos_frac": 0.7109375, "sample": [-89.39984130859375, 0.0, 108.8736572265625, -116.69369506835938, 12.357780456542969, -16.409912109375, -163.603515625, -9.8106689453125, 36.30331802368164, 98.35092163085938, 119.93827056884766, 91.48193359375, 38.62276077270508, 77.27249145507812, 104.00563049316406, -8.684249877929688, 223.62237548828125, 16.398193359375, 284.26202392578125, 97.85438537597656, 96.69288635253906, 26.065650939941406, 215.32723999023438, 140.26751708984375, 21.8341064453125, 84.26113891601562, 172.614990234375, 98.55575561523438, -7.3448486328125, 170.77281188964844, 51.70452880859375, 70.06080627441406, -29.9951114654541, 44.7672119140625, -34.541168212890625, 41.972068786621094, 146.81045532226562, -85.02301025390625, 0.0, 91.74725341796875, 54.80682373046875, 141.79031372070312, -18.985504150390625, 17.022796630859375, 111.11006164550781, -64.92922973632812, 44.895751953125, 171.23287963867188, 28.64764404296875, 21.67058563232422, 10.949922561645508, -2.9572601318359375, 42.877685546875, 40.86192321777344, 40.33525848388672, 65.45790100097656, 185.43865966796875, -76.66156005859375, 53.964752197265625, 126.23974609375, 121.79354858398438, 25.21038818359375, 132.7699737548828, 73.65383911132812, -76.62510681152344, 98.7177734375, 88.98431396484375, 6.92877197265625, -159.49151611328125, 20.0093994140625, 31.79742431640625, 24.73056411743164, 8.45355224609375, -24.021652221679688, -193.6556396484375, 118.76419067382812, -3.2463817596435547, -84.37042999267578, -56.4403076171875, 117.92202758789062, -0.33518218994140625, 82.65921020507812, 166.0736083984375, 32.64410400390625, 87.89117431640625, 36.8826904296875, 25.332489013671875, 238.21905517578125, 95.3265380859375, -16.48740577697754, 185.34832763671875, 3.7986488342285156, -12.794624328613281, 220.3347930908203, 89.09492492675781, 36.2955322265625, -38.2923583984375, 44.6546630859375, 150.37222290039062, 65.99136352539062, 179.88626098632812, 96.25669860839844, -62.741607666015625, -35.19927978515625, 6.31121826171875, 31.87701416015625, 234.05450439453125, 258.47467041015625, -40.944732666015625, 3.42034912109375, 96.63021087646484, -31.120849609375, -5.4568939208984375, -30.990825653076172, 116.67379760742188, 50.847869873046875, 57.491302490234375, 139.20562744140625, 114.10821533203125, 169.80291748046875, 170.41346740722656, -3.1238555908203125, 134.75308227539062, 11.650146484375, -73.99459838867188, -25.900405883789062, -162.18612670898438, 157.01922607421875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000440.npy"}
{"epoch": 0.9214659685863874, "step": 441, "batch_size": 128, "mean": 76.48146057128906, "std": 90.27230834960938, "min": -132.59213256835938, "p10": -46.05006713867187, "median": 70.58531188964844, "p90": 191.22262878417968, "max": 316.7815856933594, "pos_frac": 0.8046875, "sample": [-21.894027709960938, 47.2103271484375, -57.907470703125, 159.3787841796875, 66.24412536621094, -82.86615753173828, 125.67294311523438, 180.44451904296875, 75.62338256835938, 176.46414184570312, 169.94357299804688, 101.28924560546875, 61.71636962890625, 143.2584228515625, 103.32100677490234, -15.189453125, 8.2030029296875, 0.0, 3.815826416015625, 33.105560302734375, 142.23301696777344, -53.212913513183594, 242.1185302734375, 35.622802734375, 26.6202392578125, 185.61614990234375, 125.86093139648438, -132.59213256835938, 223.77462768554688, 280.61639404296875, 195.48880004882812, 120.08547973632812, 6.570377349853516, -62.36116027832031, 190.660888671875, 84.19012451171875, 41.44561767578125, 214.90040588378906, 162.691650390625, -37.83336639404297, 66.92425537109375, 162.2484893798828, 159.13973999023438, 52.84100341796875, 69.43576049804688, 6.773406982421875, 7.841644287109375, 82.37130737304688, 57.790687561035156, 154.32302856445312, 165.64459228515625, -90.9157485961914, 132.0103759765625, -87.5159912109375, 193.43463134765625, 97.815673828125, 185.39517211914062, -12.85565185546875, 110.34445190429688, 111.31576538085938, 164.856689453125, 17.6732177734375, -44.23089599609375, 29.308837890625, 84.62519836425781, 26.58953857421875, -109.19927978515625, 316.7815856933594, 110.49166870117188, 96.63801574707031, -34.05165100097656, -50.2947998046875, 125.29290771484375, 42.984466552734375, 164.2666015625, 177.29054260253906, -3.95745849609375, 7.260650634765625, 55.93052673339844, -18.040924072265625, 8.108596801757812, 108.01073455810547, 0.0, 49.27641296386719, -83.54254150390625, -73.29336547851562, 192.53335571289062, 33.10114669799805, -79.19670104980469, 55.277435302734375, 21.178375244140625, 37.53497314453125, 206.22613525390625, 25.43206024169922, 111.51473999023438, 147.46710205078125, 156.18093872070312, 174.768798828125, 106.79339599609375, 22.464019775390625, 149.73468017578125, -0.6795005798339844, 79.4014892578125, 76.90126037597656, 78.40277099609375, 22.8521728515625, 166.19696044921875, 155.0047607421875, -41.224090576171875, 11.875442504882812, 53.982154846191406, 196.15670776367188, 66.65292358398438, 71.73486328125, 144.02882385253906, 210.10247802734375, 93.23826599121094, -55.56549072265625, 25.395431518554688, 86.60226440429688, 124.08740234375, 120.37887573242188, 40.35483932495117, 48.58198547363281, 280.7555236816406, 68.56428527832031, 207.6707763671875, 33.6939697265625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000441.npy"}
{"epoch": 0.9235602094240838, "step": 442, "batch_size": 128, "mean": 55.670654296875, "std": 101.72242736816406, "min": -221.61407470703125, "p10": -64.43302307128906, "median": 45.14464569091797, "p90": 194.9277313232422, "max": 310.0013122558594, "pos_frac": 0.734375, "sample": [194.73480224609375, 28.721710205078125, -0.3468780517578125, -78.8290786743164, -10.537841796875, 218.84176635742188, 87.11972045898438, 139.21212768554688, 64.53634643554688, 32.08879089355469, 47.11468505859375, -36.506072998046875, 1.1468124389648438, -17.75634765625, 106.20210266113281, 72.384765625, -221.61407470703125, 201.06613159179688, 158.17364501953125, 9.73834228515625, 236.68405151367188, 150.3376007080078, 79.68666076660156, 220.131591796875, 45.68833923339844, 47.6368408203125, -5.804027557373047, 310.0013122558594, 3.122406005859375, 157.88552856445312, 18.824127197265625, 12.533279418945312, 177.61090087890625, 145.21792602539062, 7.455522537231445, -54.029144287109375, 7.5745391845703125, -59.622802734375, -101.659912109375, 40.40918731689453, 37.5626220703125, 35.583740234375, 32.84471130371094, 9.04812240600586, 62.887550354003906, 169.9088134765625, 102.4203109741211, 44.6009521484375, 87.34405517578125, 145.12197875976562, -11.453886032104492, -147.00970458984375, 175.22531127929688, 111.74871826171875, 195.37789916992188, 192.85638427734375, 189.4976806640625, 236.9691162109375, -66.80169677734375, -37.057342529296875, 7.980438232421875, 31.59686279296875, 90.33233642578125, -85.2298583984375, 174.84246826171875, 13.336944580078125, 80.59379577636719, 52.78855895996094, 88.84840393066406, -34.23202133178711, 45.943817138671875, 26.158279418945312, 149.71722412109375, 3.74700927734375, 95.80758666992188, -63.417877197265625, 123.30056762695312, 0.0, 43.30375671386719, -16.710739135742188, 201.21156311035156, 68.18075561523438, 183.69235229492188, 104.23214721679688, 33.70750427246094, 215.41363525390625, 5.3402099609375, -33.15803527832031, -35.25315856933594, 12.198348999023438, 70.5489501953125, 11.358673095703125, 84.51211547851562, 73.34989929199219, 127.17221069335938, 70.24407958984375, -30.9278564453125, -18.063430786132812, 165.39208984375, -61.98626708984375, 10.226806640625, -173.57855224609375, -7.89605712890625, 142.5906982421875, 240.74302673339844, 187.0159912109375, -67.95877075195312, -41.274993896484375, -105.71575927734375, 3.4511566162109375, -135.57875061035156, 115.81631469726562, 235.4434814453125, 14.3006591796875, 56.03826904296875, 5.7074432373046875, -115.3746337890625, 205.32098388671875, 141.63885498046875, 85.65216064453125, -186.45748901367188, 232.31866455078125, -57.05787658691406, 140.7601318359375, -109.91793823242188, 20.2108154296875, 85.98976135253906, 125.70524597167969], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000442.npy"}
{"epoch": 0.9256544502617801, "step": 443, "batch_size": 128, "mean": 64.28445434570312, "std": 96.59688568115234, "min": -202.80386352539062, "p10": -38.25665893554685, "median": 56.201297760009766, "p90": 203.15204467773438, "max": 326.55145263671875, "pos_frac": 0.7890625, "sample": [55.7122802734375, 29.9857177734375, 94.12205505371094, 94.56813049316406, -150.86935424804688, 19.605548858642578, -31.200927734375, 49.3924560546875, -8.285186767578125, 56.69031524658203, 119.79420471191406, 29.807449340820312, 74.945556640625, -55.07194519042969, 94.9219970703125, -172.89907836914062, 105.36343383789062, 84.03240203857422, 19.318252563476562, -30.24688720703125, 67.55865478515625, 326.55145263671875, 154.75465393066406, 83.49697875976562, 85.85394287109375, 79.37530517578125, 150.10943603515625, -2.1749000549316406, -54.72003173828125, 96.4405746459961, 25.367462158203125, -22.726478576660156, -18.59521484375, 135.94305419921875, 213.56716918945312, 4.65447998046875, 132.3818359375, 8.560165405273438, 96.06838989257812, 5.598201751708984, 152.3216552734375, 160.3033447265625, 42.38520812988281, 65.65087890625, -1.3985443115234375, 75.341552734375, 123.30691528320312, 168.17953491210938, 119.49623107910156, 199.1988525390625, 14.448745727539062, 49.42793273925781, 87.51817321777344, 33.17015075683594, -160.8165283203125, 59.12823486328125, 106.53494262695312, -122.93402862548828, 140.71278381347656, -27.792999267578125, 39.5367431640625, 188.2737274169922, 49.5476188659668, -202.80386352539062, 212.590087890625, -71.08843994140625, -15.189590454101562, 72.0999755859375, 178.21185302734375, -90.83560180664062, 13.474014282226562, 30.39373779296875, 42.242401123046875, 57.83021545410156, 116.7342529296875, 185.2661895751953, 70.82707214355469, 268.0064392089844, 23.03907012939453, 204.65475463867188, 121.13092041015625, 41.25254821777344, 296.22454833984375, 75.19293212890625, 118.37301635742188, -12.96356201171875, 30.851776123046875, 45.823150634765625, 45.436309814453125, 210.74105834960938, 213.701904296875, 207.45040893554688, 172.9517822265625, 202.50802612304688, 249.30111694335938, -15.527511596679688, 16.65831756591797, 147.07171630859375, -77.08334350585938, 88.51045989990234, 69.09500122070312, 106.63053894042969, 255.91653442382812, -8.218551635742188, 81.36990356445312, 12.846809387207031, -93.9166259765625, 21.72662353515625, 90.30169677734375, 213.78298950195312, 26.232284545898438, 19.296859741210938, -20.533477783203125, 142.33013916015625, -106.12557983398438, 14.005401611328125, 51.370025634765625, 16.518600463867188, 102.627685546875, 42.76319885253906, -31.147476196289062, 214.7700653076172, 30.024627685546875, 129.6183319091797, 28.757110595703125, -111.90863037109375, 18.102508544921875, 29.82275390625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000443.npy"}
{"epoch": 0.9277486910994764, "step": 444, "batch_size": 128, "mean": 70.05929565429688, "std": 111.43712615966797, "min": -257.2200927734375, "p10": -54.157631301879874, "median": 59.33930969238281, "p90": 211.50189514160155, "max": 406.5047607421875, "pos_frac": 0.7265625, "sample": [-14.356101989746094, 111.23526763916016, 130.65802001953125, -1.6912841796875, 177.16171264648438, -36.34471130371094, 337.04840087890625, -88.56137084960938, 173.65707397460938, 157.7773895263672, -37.05023193359375, 30.341026306152344, -16.0523681640625, -82.31765747070312, 215.8153076171875, 51.418731689453125, 51.34563446044922, -58.6268310546875, 108.62966918945312, 185.28843688964844, -10.9710693359375, 66.44418334960938, 21.126983642578125, -51.8292236328125, 406.5047607421875, -7.042198181152344, 3.9213027954101562, 254.55728149414062, 19.81622314453125, 158.94110107421875, 10.53802490234375, 46.294471740722656, 183.89178466796875, 134.3246307373047, -186.92538452148438, 16.1568603515625, -46.77980041503906, 94.5684814453125, 217.0299072265625, 183.81900024414062, 109.67060852050781, 28.646209716796875, 147.63336181640625, -41.11395263671875, 43.78594970703125, 241.75527954101562, -7.372699737548828, 185.11920166015625, 81.69123840332031, 320.87579345703125, 159.3111572265625, 117.45906829833984, 6.3956298828125, 33.11449432373047, 209.65328979492188, 12.966728210449219, 34.671241760253906, 222.14776611328125, 68.85433197021484, -86.297607421875, 187.68499755859375, 234.37942504882812, -50.425018310546875, 112.4136734008789, 43.470008850097656, 32.23785400390625, -22.881622314453125, 76.60113525390625, 61.602081298828125, 145.43878173828125, -15.24481201171875, -67.81881713867188, 16.66949462890625, -64.27072143554688, 147.5350341796875, 20.068798065185547, 244.36038208007812, 272.8294677734375, -6.0086669921875, 82.11517333984375, -66.9862060546875, 150.72210693359375, 95.7978515625, 106.03643798828125, -41.12605285644531, 113.83367919921875, 14.305944442749023, 161.04371643066406, 181.5519561767578, -138.33477783203125, -47.49920654296875, 200.06320190429688, -10.448331832885742, 26.400741577148438, -35.377105712890625, -19.45643424987793, -83.14016723632812, 149.51663208007812, 100.89279174804688, 72.89068603515625, -70.833740234375, 149.959716796875, 13.4234619140625, 67.5810317993164, 11.8709716796875, 11.716522216796875, 60.49591064453125, 148.73487854003906, 8.823114395141602, 260.36566162109375, 133.01315307617188, -22.27777099609375, 133.64120483398438, 173.38693237304688, 204.57095336914062, 67.2445068359375, 58.182708740234375, 294.76153564453125, -52.24225997924805, 143.65426635742188, 73.37175750732422, 62.14030456542969, 31.200286865234375, 41.95408630371094, -177.93258666992188, 46.3570556640625, 171.4709014892578, -257.2200927734375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000444.npy"}
{"epoch": 0.9298429319371728, "step": 445, "batch_size": 128, "mean": 57.31190490722656, "std": 104.9087142944336, "min": -201.28836059570312, "p10": -66.39955139160155, "median": 44.060895919799805, "p90": 197.38431091308593, "max": 494.9361267089844, "pos_frac": 0.7265625, "sample": [35.937705993652344, -201.28836059570312, 164.59239196777344, 63.6796875, 2.559326171875, 24.377243041992188, -131.88543701171875, -17.999862670898438, 16.56121826171875, 56.66319274902344, 130.155517578125, 27.592453002929688, 5.658073425292969, -59.257171630859375, -31.89255142211914, -27.25189208984375, 136.5537109375, 112.61284637451172, 87.35533142089844, -198.644287109375, -4.053230285644531, 11.30426025390625, 179.597900390625, -11.718292236328125, 54.31005859375, -25.525405883789062, 3.2441787719726562, -74.1282958984375, 0.0, -5.2304229736328125, 39.97929382324219, -90.78555297851562, 72.6402587890625, 196.1685791015625, -7.038978576660156, 41.024444580078125, 14.708728790283203, 67.3985366821289, 220.83648681640625, 91.45587158203125, 494.9361267089844, 29.05755615234375, 52.76283264160156, -89.67285919189453, 91.194580078125, 34.98017883300781, 165.118408203125, 5.73443603515625, 196.78494262695312, 258.2422180175781, 51.214569091796875, 128.39971923828125, 34.52989959716797, 27.462066650390625, 203.259033203125, 156.720703125, 45.68265151977539, -17.66741943359375, 112.42744445800781, -104.62869262695312, 36.411094665527344, 31.142303466796875, 54.949951171875, 203.71121215820312, 171.8710174560547, -122.14373779296875, 23.198097229003906, 152.77955627441406, 9.96185302734375, 199.3195343017578, 205.19796752929688, 8.122222900390625, -0.88446044921875, 38.74513244628906, 91.12286376953125, 5.216922760009766, 60.9775390625, 68.576416015625, -13.48248291015625, 296.01123046875, -17.974090576171875, 162.2158966064453, -16.677810668945312, 35.50953674316406, -34.79156494140625, 57.464080810546875, 2.0560302734375, 61.823204040527344, 91.57041931152344, -27.682090759277344, -16.504302978515625, 115.79058837890625, -76.37640380859375, -135.68603515625, -116.45745849609375, 0.35711097717285156, 42.43914031982422, -63.33880615234375, 23.84991455078125, 30.701873779296875, 163.08505249023438, 68.071044921875, -19.197402954101562, 170.2362060546875, 173.08506774902344, 64.74163818359375, 198.7828369140625, 105.167236328125, 46.00217056274414, 97.76199340820312, 261.33447265625, 110.9130859375, 60.55096435546875, -121.04269409179688, 102.00982666015625, 78.3611831665039, 248.48846435546875, -2.929412841796875, -73.54129028320312, 108.56912231445312, 154.61175537109375, 50.814544677734375, 84.39706420898438, 117.13397216796875, 77.92807006835938, 221.45970153808594, -13.928573608398438, 317.158447265625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000445.npy"}
{"epoch": 0.9319371727748691, "step": 446, "batch_size": 128, "mean": 68.27171325683594, "std": 102.38655090332031, "min": -223.90283203125, "p10": -39.66782836914062, "median": 66.37059783935547, "p90": 197.83255615234373, "max": 421.48828125, "pos_frac": 0.7578125, "sample": [209.08987426757812, 164.08990478515625, 115.15274047851562, -3.67657470703125, -175.18072509765625, 46.389015197753906, -129.5281982421875, 34.65947723388672, 153.57498168945312, 6.956291198730469, -52.990379333496094, -12.192237854003906, 230.70022583007812, -220.39151000976562, 31.31658935546875, 81.1304931640625, 202.18218994140625, 169.45059204101562, 40.4271240234375, 114.432373046875, 111.31280517578125, 10.287017822265625, 25.60694122314453, 200.19189453125, 133.25299072265625, 8.925338745117188, 224.76101684570312, 8.111663818359375, 21.582550048828125, 137.34295654296875, 157.39671325683594, -77.89443969726562, 28.250686645507812, 25.07941436767578, 142.39773559570312, -60.10944366455078, 236.06898498535156, 6.8859100341796875, 54.310546875, 81.24533081054688, -10.740325927734375, 100.57894897460938, 115.0849609375, 74.38552856445312, 218.93878173828125, -47.53345489501953, 138.75494384765625, 187.89501953125, 421.48828125, 170.95904541015625, -20.88592529296875, 188.3411865234375, 81.34721374511719, -8.9127197265625, 208.8587646484375, 196.8214111328125, 53.58861541748047, 15.412017822265625, 55.39154052734375, 192.59844970703125, 111.45735168457031, -21.8292236328125, 219.0118408203125, 10.958847045898438, 65.46580505371094, -126.76824951171875, 30.314498901367188, -8.833267211914062, 115.32894897460938, 182.12310791015625, 119.89714050292969, 60.439788818359375, 8.811714172363281, -38.86955261230469, 203.6205596923828, -2.7938003540039062, 34.18894958496094, 119.6483154296875, -223.90283203125, 40.23027038574219, 151.27182006835938, 106.25482177734375, 87.03057861328125, 121.38514709472656, -156.74002075195312, -105.35073852539062, 2.8326263427734375, 154.9429931640625, 106.31353759765625, 108.42643737792969, -58.32183837890625, 351.5134582519531, 13.85101318359375, 67.275390625, -13.846511840820312, -12.032211303710938, 29.687454223632812, 83.97930908203125, 78.2103271484375, 72.03665161132812, 50.30694580078125, 144.77801513671875, 76.60595703125, 181.26046752929688, -3.426910400390625, -41.53047180175781, 8.372039794921875, 21.256927490234375, 152.61099243164062, -31.856842041015625, 93.71420288085938, 0.0, 215.55532836914062, 138.39891052246094, -8.312591552734375, 0.0, 2.728271484375, -3.804931640625, 22.073814392089844, 0.0, 132.10540771484375, 162.34669494628906, 119.1434326171875, 87.3701171875, 140.80953979492188, 5.391845703125, 98.9935531616211, 73.6968994140625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000446.npy"}
{"epoch": 0.9340314136125655, "step": 447, "batch_size": 128, "mean": 74.85385131835938, "std": 90.73916625976562, "min": -145.65350341796875, "p10": -33.383524322509764, "median": 66.84688949584961, "p90": 210.0399383544922, "max": 359.70513916015625, "pos_frac": 0.796875, "sample": [-20.590286254882812, 2.226156234741211, 188.27975463867188, 162.96063232421875, 106.96774291992188, 129.7927703857422, 62.000274658203125, 158.12234497070312, 0.0, 53.10784912109375, 136.916015625, 21.658992767333984, 67.48107147216797, 0.0, 66.21270751953125, 77.8920669555664, -79.96923828125, 158.46871948242188, 127.4560546875, -42.19883728027344, 211.16592407226562, 285.1757507324219, 0.0, 73.46812438964844, 217.90457153320312, -7.2896728515625, -55.98839569091797, 50.905303955078125, 147.39166259765625, 209.557373046875, 359.70513916015625, 60.362064361572266, -8.049484252929688, 27.77829360961914, 248.13714599609375, 68.10492706298828, 118.65483093261719, 136.51890563964844, 39.745582580566406, 251.452880859375, 32.058135986328125, 71.77371215820312, 169.4918212890625, 80.20736694335938, 94.85189819335938, 36.47307586669922, -30.28472900390625, -80.70262145996094, 1.73101806640625, 24.391281127929688, 137.39999389648438, 63.314598083496094, 96.87403869628906, 161.410400390625, 62.2713623046875, 61.98028564453125, -145.65350341796875, 0.0, 11.417579650878906, 185.72927856445312, -35.418663024902344, 68.40524291992188, 0.122833251953125, -32.511322021484375, -44.88916015625, 215.462646484375, 126.40078735351562, 96.43951416015625, 8.112777709960938, 51.515899658203125, -55.641815185546875, 180.96090698242188, 77.83816528320312, 31.39202880859375, 54.70684814453125, 79.81035614013672, -90.55909729003906, 75.22479248046875, 21.49382781982422, 238.75738525390625, -82.20365905761719, 42.037109375, 83.51620483398438, -0.2665061950683594, 41.511199951171875, 129.495849609375, 23.053955078125, -98.64448547363281, 24.0941162109375, 104.08468627929688, 88.33914184570312, 62.349609375, 138.180908203125, 219.2174072265625, 147.74888610839844, 82.67701721191406, 81.71844482421875, 248.3685302734375, 142.2483367919922, 98.07782745361328, 52.93359375, -9.036453247070312, 125.07666015625, 195.21682739257812, -22.18328857421875, 250.60806274414062, 24.75098419189453, 72.69952392578125, 31.26617431640625, 56.279258728027344, -30.66998291015625, 81.85958862304688, -65.44357299804688, 64.29953002929688, 73.22491455078125, 147.94000244140625, 180.14138793945312, 71.9708251953125, 31.223388671875, 23.597763061523438, 48.692779541015625, 25.979415893554688, 215.6171417236328, 54.778411865234375, 141.88189697265625, 263.9370422363281, 70.93637084960938, -85.73477172851562], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000447.npy"}
{"epoch": 0.9361256544502617, "step": 448, "batch_size": 128, "mean": 68.65805053710938, "std": 98.42581939697266, "min": -176.2452392578125, "p10": -44.87107543945312, "median": 55.70258331298828, "p90": 199.90689544677733, "max": 365.33673095703125, "pos_frac": 0.75, "sample": [62.495849609375, 90.52574157714844, 28.991348266601562, 37.18572998046875, 161.30374145507812, -3.43389892578125, 141.76840209960938, -78.5145263671875, 52.49468994140625, 138.2025146484375, 39.76519775390625, -2.3002986907958984, 24.175445556640625, 272.7176513671875, 20.97582244873047, 34.14697265625, 184.7926483154297, 209.546630859375, 83.69779968261719, 55.52967834472656, -0.20703125, -69.6125717163086, 35.19366455078125, 140.12789916992188, -42.14109802246094, 221.3222198486328, -77.29346466064453, 219.6220245361328, 72.05735778808594, 52.3546142578125, 13.386383056640625, 25.82733154296875, 118.60745239257812, 191.47259521484375, 32.834014892578125, -125.2069091796875, 103.65552520751953, 311.42608642578125, 49.23779296875, -1.063201904296875, 84.76513671875, 270.9610290527344, 191.49032592773438, -26.184680938720703, 14.69671630859375, 133.0060272216797, 131.75048828125, -11.542755126953125, 251.97329711914062, 257.09783935546875, 229.87591552734375, 178.20504760742188, 96.60169982910156, -37.12386703491211, 150.6743927001953, -8.69976806640625, 55.87548828125, -51.87286376953125, -112.69931030273438, -176.2452392578125, 29.13641357421875, -88.39801025390625, 81.90785217285156, 1.1682472229003906, 53.92018127441406, 146.73721313476562, -44.652099609375, -28.9947509765625, 108.16000366210938, 86.5108642578125, 144.0062255859375, 105.48314666748047, -42.549476623535156, 35.659332275390625, 84.36898040771484, -8.37143325805664, 40.25518798828125, 157.36795043945312, 42.921234130859375, 197.76904296875, 82.82647705078125, 123.61087036132812, 5.191322326660156, 56.29632568359375, 145.195068359375, -45.38201904296875, -110.90785217285156, -64.12997436523438, 139.4412841796875, 11.137588500976562, -35.419891357421875, 64.63838195800781, 136.18580627441406, 4.77606201171875, 2.197389602661133, 173.77029418945312, 61.3983154296875, 53.069854736328125, 18.73175048828125, 47.74169921875, -39.05450439453125, 134.95123291015625, 163.29916381835938, 22.87823486328125, 9.79345703125, -32.274200439453125, -16.537330627441406, 295.39520263671875, 204.8952178955078, 106.89688110351562, 51.62060546875, 5.8513641357421875, 229.05947875976562, 83.95635986328125, 70.61822509765625, 57.98565673828125, 132.50872802734375, -46.07551574707031, 83.90069580078125, 108.85546875, 113.56192016601562, 105.9780502319336, 148.86009216308594, -86.26904296875, 365.33673095703125, -19.674468994140625, 125.62200927734375, -14.757003784179688], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000448.npy"}
{"epoch": 0.9382198952879581, "step": 449, "batch_size": 128, "mean": 68.94822692871094, "std": 97.71006774902344, "min": -232.8936004638672, "p10": -57.03404083251953, "median": 78.67573547363281, "p90": 201.19252014160153, "max": 252.41464233398438, "pos_frac": 0.78125, "sample": [79.98049926757812, 13.014541625976562, -37.69786071777344, 94.0080795288086, 207.53594970703125, 104.52407836914062, 1.06640625, 155.8504638671875, 213.7620849609375, 188.54476928710938, 17.08856201171875, -7.337062835693359, 231.406005859375, 18.25860595703125, -88.43453216552734, 9.690673828125, 151.44517517089844, 70.0161361694336, 46.8607177734375, 93.78944396972656, -142.78146362304688, -56.97911834716797, 78.75540161132812, 113.01202392578125, 233.13540649414062, 154.33840942382812, 224.815185546875, -18.544937133789062, -34.116127014160156, 1.4676132202148438, 193.97537231445312, 22.131101608276367, 85.17950439453125, -5.5816650390625, 5.58111572265625, 195.2041015625, 164.39028930664062, 113.92240905761719, 141.844970703125, 96.66844177246094, -62.672760009765625, 121.28256225585938, 176.4925537109375, 88.6328125, -5.030548095703125, -28.649673461914062, 24.350788116455078, 110.45230102539062, 96.98065185546875, 91.40093994140625, -55.02967834472656, 119.42567443847656, -160.18603515625, 194.91160583496094, -15.454330444335938, -232.8936004638672, 176.14385986328125, 2.842926025390625, 198.47390747070312, 23.990142822265625, 143.40914916992188, 50.69490051269531, 37.948944091796875, 209.67120361328125, -24.201759338378906, -10.047737121582031, 15.91943359375, 27.15826416015625, 159.34750366210938, 78.57366943359375, 192.7513427734375, 174.87265014648438, 122.665771484375, 67.07003021240234, -12.159713745117188, 78.5960693359375, 64.54747009277344, 18.543441772460938, 33.62640380859375, -57.25372314453125, 80.55047607421875, 91.20308685302734, -160.66680908203125, 12.408580780029297, -123.32843017578125, 135.7354736328125, -96.88006591796875, 91.27623748779297, 105.99703979492188, -25.611358642578125, 15.192733764648438, 109.759765625, 96.282958984375, -102.68319702148438, 214.87493896484375, 130.87457275390625, 71.45646667480469, 63.21720886230469, -89.52865600585938, 252.41464233398438, 155.288330078125, 230.7523193359375, 39.435943603515625, 17.73240089416504, 14.695728302001953, 117.60430908203125, 219.613037109375, 245.78472900390625, -57.162193298339844, 118.63459777832031, 123.813720703125, 68.90188598632812, 11.649650573730469, 143.43048095703125, 100.78791809082031, 211.0623779296875, 149.2076416015625, 61.2459716796875, 2.2992935180664062, 79.03594970703125, -36.16522216796875, 221.23822021484375, 110.3212890625, 139.1151123046875, 102.84359741210938, 45.17512512207031, -73.64682006835938, 23.14385986328125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000449.npy"}
{"epoch": 0.9403141361256544, "step": 450, "batch_size": 128, "mean": 62.1510124206543, "std": 86.11939239501953, "min": -137.6864013671875, "p10": -24.261609268188476, "median": 56.276458740234375, "p90": 195.33500671386716, "max": 313.05462646484375, "pos_frac": 0.75, "sample": [0.0, -13.973663330078125, 119.13775634765625, -45.840484619140625, 55.2366943359375, -9.714874267578125, 62.226470947265625, -32.57501220703125, -16.09001922607422, 135.36993408203125, 70.63165283203125, -17.436283111572266, 200.138427734375, 101.16182708740234, 62.99352264404297, 73.73277282714844, 39.68109130859375, 191.48028564453125, 193.72573852539062, 60.17021560668945, 2.75469970703125, -92.62286376953125, -60.452606201171875, -137.6864013671875, 46.554443359375, 108.64385986328125, -16.02832794189453, 56.79229736328125, 0.454193115234375, -21.32598876953125, 128.24484252929688, -2.9921112060546875, 8.75738525390625, 0.194610595703125, 46.6816291809082, -42.6087646484375, 208.78125, 5.538299560546875, 64.07403564453125, 4.928985595703125, -18.212928771972656, 66.23275756835938, 151.43380737304688, 133.70323181152344, 22.212902069091797, 18.100601196289062, 146.72015380859375, 16.67681884765625, 102.147216796875, 120.37666320800781, 20.497299194335938, 68.77413177490234, 42.21333312988281, 70.98672485351562, 165.2923583984375, 57.541290283203125, 233.89349365234375, 62.8150634765625, 12.153066635131836, 55.7606201171875, -21.185882568359375, 110.33197021484375, 88.93588256835938, 145.73080444335938, 199.0899658203125, 57.8883056640625, 71.27462768554688, 236.1273193359375, 109.85369873046875, 132.23953247070312, 95.80747985839844, 93.11778259277344, 143.08203125, 116.69271850585938, -21.185501098632812, 45.096160888671875, 68.67140197753906, 190.62896728515625, 185.08856201171875, -48.23828125, -16.162994384765625, 217.97332763671875, 35.64128112792969, 49.49334716796875, 164.24801635742188, 74.84915161132812, -129.57754516601562, 127.4393310546875, -40.391868591308594, 4.193695068359375, 79.56423950195312, 19.33661651611328, 0.0, -11.296173095703125, 28.190948486328125, 127.01129150390625, 14.393951416015625, 57.46343994140625, 213.86685180664062, 24.809234619140625, -11.416839599609375, 48.85459899902344, -22.974166870117188, 62.734039306640625, -109.45091247558594, 53.983795166015625, -23.888301849365234, -0.3241119384765625, 96.82290649414062, -121.70217895507812, 200.4979248046875, 11.075225830078125, 138.29473876953125, 201.54782104492188, 17.322021484375, 211.88584899902344, 100.10833740234375, 125.35333251953125, 19.323379516601562, 17.35460662841797, 313.05462646484375, 246.66488647460938, -25.132659912109375, 235.43484497070312, 1.64044189453125, 83.08126831054688, -39.07542419433594, -3.8924560546875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000450.npy"}
{"epoch": 0.9424083769633508, "step": 451, "batch_size": 128, "mean": 61.40455627441406, "std": 103.09078979492188, "min": -201.79400634765625, "p10": -68.49771575927734, "median": 56.839088439941406, "p90": 195.70762634277344, "max": 335.66864013671875, "pos_frac": 0.703125, "sample": [88.61846923828125, 155.26531982421875, -40.35517883300781, -15.065223693847656, -201.79400634765625, 158.26754760742188, 29.239654541015625, -74.4815673828125, 94.47439575195312, 30.235198974609375, 242.41073608398438, -69.90753173828125, 270.6493225097656, -18.58819580078125, -0.05859375, 237.358642578125, -4.556419372558594, 4.5746917724609375, -20.48656463623047, -31.47527313232422, -48.86662292480469, 15.82623291015625, -71.08030700683594, 173.89730834960938, 131.0904541015625, -8.52764892578125, 28.35174560546875, 164.71600341796875, 101.67462158203125, 96.52774047851562, 49.245033264160156, 61.1971549987793, 255.68707275390625, 99.63796997070312, 132.88612365722656, 3.3164520263671875, 89.81712341308594, 40.712860107421875, 142.3946533203125, 160.96971130371094, 262.7531433105469, -44.38648986816406, 2.189727783203125, 77.54595947265625, -26.047500610351562, 311.8743591308594, 47.123779296875, 58.945343017578125, 208.61456298828125, 143.8810577392578, -24.078521728515625, -13.629058837890625, 72.2196044921875, 157.635986328125, 199.1417236328125, 275.20184326171875, 194.23587036132812, 16.4317626953125, 35.208282470703125, 185.40573120117188, 128.61859130859375, 13.638092041015625, 14.898857116699219, 275.8375244140625, -121.7637939453125, -31.49462890625, 90.68624877929688, 63.123268127441406, 159.01858520507812, 226.54148864746094, 199.63348388671875, 73.5560302734375, 180.93295288085938, 20.286102294921875, 335.66864013671875, -15.160690307617188, -12.000953674316406, 122.09912109375, -125.99746704101562, 78.36264038085938, -38.17559814453125, 77.0238037109375, 15.873382568359375, -159.78781127929688, -67.89350891113281, 24.288909912109375, -83.45956420898438, 56.20497131347656, 10.071285247802734, -32.45459747314453, 120.18084716796875, 44.287078857421875, 41.28179931640625, -25.926376342773438, -83.50569152832031, 104.18730163574219, 159.99813842773438, 12.085693359375, 128.3804931640625, 68.82183837890625, 64.6103744506836, 186.56112670898438, -100.10124206542969, 12.278396606445312, 72.38607788085938, 139.22784423828125, -90.21806335449219, 57.47320556640625, 64.71566772460938, 159.2506561279297, 26.68194580078125, 155.76162719726562, -16.919509887695312, -50.0091552734375, 51.6929931640625, 62.287109375, 73.36639404296875, 78.96363830566406, 117.56393432617188, -7.861785888671875, -125.05291748046875, 171.62429809570312, -24.6385498046875, -95.20999145507812, 46.20561218261719, 78.90402221679688, -3.8070068359375, 112.01324462890625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000451.npy"}
{"epoch": 0.9445026178010472, "step": 452, "batch_size": 128, "mean": 66.10223388671875, "std": 101.47079467773438, "min": -190.22207641601562, "p10": -51.02420425415039, "median": 57.483062744140625, "p90": 206.55709991455078, "max": 385.0069580078125, "pos_frac": 0.7265625, "sample": [18.63818359375, 146.35357666015625, 148.7344970703125, 385.0069580078125, -18.385120391845703, 216.905029296875, 124.55563354492188, -52.696685791015625, 168.29910278320312, 72.48785400390625, -15.53900146484375, 112.45654296875, -56.09375, 51.358978271484375, 206.225830078125, 95.7147216796875, -87.00811767578125, 230.70440673828125, 54.072021484375, 147.59515380859375, 30.610313415527344, 129.38430786132812, -6.868133544921875, -48.0347900390625, -190.22207641601562, 118.777099609375, 219.662841796875, 0.4411773681640625, -65.75202941894531, 65.78875732421875, 142.25164794921875, 0.0, -136.9641876220703, 94.95874786376953, 24.60511016845703, 211.568359375, 328.2193298339844, 176.01922607421875, -40.34544372558594, 111.49121856689453, 7.234794616699219, -68.41542053222656, -24.08056640625, 145.3948211669922, 134.91326904296875, 59.32061767578125, 29.8773193359375, -144.03292846679688, 176.37643432617188, 4.08331298828125, 25.773651123046875, 181.844970703125, 271.4336853027344, 66.19720458984375, 61.539337158203125, 159.16064453125, 234.10076904296875, 72.0250473022461, -29.11608123779297, 93.02423095703125, 119.29568481445312, -19.261363983154297, -66.80087280273438, -80.86381530761719, -66.298828125, -50.30742645263672, 135.70245361328125, 68.22216796875, 112.99152374267578, -4.111785888671875, 25.802001953125, 6.4619140625, 55.313720703125, 83.68603515625, 58.54901123046875, 40.86244201660156, 64.46510314941406, -10.844390869140625, 96.9012451171875, 202.695556640625, 112.4525375366211, 22.1739501953125, 135.07577514648438, 282.15667724609375, 45.157447814941406, 12.701372146606445, -15.74700927734375, 46.92364501953125, 100.34432983398438, 35.4874267578125, 101.14505004882812, 242.5322265625, 29.988494873046875, 67.68270874023438, -103.02227783203125, 9.36846923828125, 44.27227783203125, 22.38623046875, -47.130218505859375, 21.2381591796875, 128.06808471679688, -0.17032241821289062, 111.712158203125, 149.51168823242188, 73.490966796875, -12.6580810546875, 7.768218994140625, 152.97628784179688, 78.21915435791016, 57.96728515625, -15.24725341796875, 47.5911865234375, 207.33006286621094, -125.54495239257812, 56.99884033203125, 136.77520751953125, 177.36843872070312, 290.055419921875, 214.27651977539062, 174.63705444335938, -5.423194885253906, 8.481338500976562, -49.18797302246094, -45.73126220703125, 14.852783203125, 137.19476318359375, -1.268310546875, -18.23725128173828], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000452.npy"}
{"epoch": 0.9465968586387434, "step": 453, "batch_size": 128, "mean": 71.29603576660156, "std": 98.28294372558594, "min": -179.26065063476562, "p10": -45.69229431152343, "median": 68.44099044799805, "p90": 209.22254791259766, "max": 420.9324951171875, "pos_frac": 0.75, "sample": [48.60546112060547, 142.37530517578125, -50.99609375, 30.72625732421875, 25.001480102539062, 139.7462615966797, 99.99247741699219, 220.3275146484375, 88.78005981445312, -113.54736328125, 8.63006591796875, 213.12152099609375, 84.6942138671875, -57.032928466796875, 32.2220458984375, 108.90673828125, 118.28883361816406, 20.533424377441406, 124.01461791992188, 88.19039916992188, -60.0369873046875, 32.7060546875, -15.944549560546875, 420.9324951171875, 38.47144317626953, 247.2823486328125, 257.0517272949219, -17.7864990234375, 30.83685302734375, 179.9290771484375, 101.78665161132812, 127.2379150390625, 291.89312744140625, 16.90673828125, 32.88618469238281, -1.5486373901367188, 28.73870849609375, -15.893157958984375, 213.7188720703125, 133.2906494140625, -56.504913330078125, 152.8294219970703, 86.42813110351562, 151.58163452148438, -15.184791564941406, 14.564544677734375, -61.05120849609375, 19.304931640625, 41.85516357421875, 69.94417572021484, 43.02569580078125, 66.93780517578125, 90.60665893554688, 48.693817138671875, -19.957542419433594, 133.83718872070312, 160.77264404296875, -43.6622314453125, 208.59237670898438, -37.893798828125, 151.3499298095703, 238.50173950195312, -23.231414794921875, 34.33123779296875, 161.91976928710938, 151.17999267578125, 91.62551879882812, -40.665008544921875, 53.14320373535156, 233.8802490234375, 152.0699462890625, 34.97137451171875, 98.6971435546875, 60.6473388671875, -50.429107666015625, 281.76092529296875, -8.212738037109375, -32.469207763671875, 210.6929473876953, 36.29020690917969, 91.1544189453125, -89.36859130859375, 32.517059326171875, 128.728515625, 18.828948974609375, 93.72393798828125, -63.951812744140625, -17.9107666015625, 54.790374755859375, 201.58709716796875, 8.16546630859375, 159.76678466796875, 103.58544921875, 7.307792663574219, 95.74583435058594, -1.92584228515625, 132.14630126953125, 78.1097412109375, -22.87713623046875, -29.950759887695312, 19.321800231933594, -123.95965576171875, 94.08462524414062, 42.911376953125, 81.43121337890625, 196.4074249267578, 99.24380493164062, 184.84088134765625, 92.86349487304688, -16.33446502685547, 0.0, 84.7734375, 93.013916015625, 160.06137084960938, 2.355527877807617, 114.48410034179688, 249.3143310546875, -179.26065063476562, 70.07328796386719, 154.3460235595703, -30.512237548828125, -67.71109008789062, 117.93841552734375, 2.715240478515625, 173.87200927734375, -122.1912841796875, 128.46133422851562, 217.3626708984375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000453.npy"}
{"epoch": 0.9486910994764398, "step": 454, "batch_size": 128, "mean": 68.63679504394531, "std": 102.72554016113281, "min": -246.55352783203125, "p10": -47.05222396850586, "median": 64.16968536376953, "p90": 198.21475830078123, "max": 327.8787841796875, "pos_frac": 0.78125, "sample": [39.95098876953125, 133.86865234375, -141.1810302734375, -28.454177856445312, 47.87090301513672, 102.26250457763672, 48.58294677734375, 28.005477905273438, -142.54483032226562, 0.26540565490722656, -4.0920257568359375, 4.779930114746094, 144.26380920410156, 26.65228271484375, 97.38232421875, 237.010498046875, 248.15127563476562, 185.96636962890625, 116.39706420898438, 61.5006103515625, 84.35236358642578, 15.287063598632812, 263.53143310546875, 21.933507919311523, -104.22048950195312, 0.06058502197265625, 232.722900390625, -63.751953125, 100.84515380859375, -110.81208801269531, 13.632720947265625, 139.45306396484375, -58.7542724609375, 45.1986083984375, 179.248046875, 105.6168212890625, 134.44281005859375, -235.28216552734375, 35.242431640625, 4.081207275390625, 72.64190673828125, -4.846855163574219, 152.3917236328125, 194.4014129638672, 116.71783447265625, -47.09033966064453, 65.35641479492188, -22.8297119140625, 0.097076416015625, 24.420822143554688, 176.74169921875, 97.82437133789062, -6.910530090332031, 13.548660278320312, 327.8787841796875, 24.45298957824707, 7.333892822265625, 68.6903076171875, 153.6521759033203, 84.9727783203125, 47.59417724609375, 17.9744873046875, -80.67828369140625, 69.19086456298828, 53.52569580078125, 194.97055053710938, 154.42628479003906, 4.825836181640625, 72.54147338867188, 59.946014404296875, 170.9036865234375, 76.25204467773438, 282.86651611328125, 227.81976318359375, 2.59539794921875, 241.4599609375, 45.0440673828125, 150.76918029785156, 264.87451171875, 228.90884399414062, 46.063720703125, -5.32098388671875, 6.470611572265625, 85.2825927734375, 122.71380615234375, 83.5244140625, 281.51409912109375, 71.35194396972656, -47.035888671875, 189.58328247070312, 16.357864379882812, 27.274444580078125, 28.187786102294922, 130.89918518066406, -57.123565673828125, 137.2840576171875, -3.2716331481933594, 115.9722900390625, 160.391845703125, 142.3446044921875, 107.14197540283203, 130.64566040039062, 205.78457641601562, -0.86700439453125, -246.55352783203125, 73.03070068359375, 59.270957946777344, 119.56692504882812, 148.6915740966797, 94.14302062988281, 225.43670654296875, 62.89862060546875, -24.281219482421875, 82.89271545410156, -23.030181884765625, -20.8349609375, -85.01611328125, 170.87149047851562, -25.75098419189453, 119.51498413085938, 3.466339111328125, 175.35330200195312, 135.38153076171875, -2.22723388671875, 69.16464233398438, -21.03076171875, 62.98295593261719, -138.32496643066406], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000454.npy"}
{"epoch": 0.9507853403141361, "step": 455, "batch_size": 128, "mean": 52.87847900390625, "std": 100.96431732177734, "min": -238.759521484375, "p10": -47.5994026184082, "median": 37.11634826660156, "p90": 169.42463378906248, "max": 374.43499755859375, "pos_frac": 0.7109375, "sample": [120.4697265625, -48.68590545654297, -6.2018280029296875, 3.341278076171875, -0.8813438415527344, 21.882354736328125, 153.89599609375, -44.27507019042969, 186.63107299804688, 69.8292236328125, 270.3409423828125, 52.085205078125, 109.55937194824219, 27.6395263671875, 6.8487396240234375, 13.57513427734375, -83.376708984375, -25.213788986206055, 86.18585205078125, -23.34600830078125, 163.21734619140625, 374.43499755859375, 36.395050048828125, -81.53240966796875, -7.21673583984375, 8.517997741699219, 18.03936767578125, -172.29519653320312, 31.3397216796875, 78.975830078125, 199.19683837890625, -6.866815567016602, 115.41244506835938, -18.7027587890625, 38.474159240722656, 315.4385681152344, -36.596588134765625, -32.433197021484375, -64.84577941894531, 47.49127197265625, 16.305694580078125, 53.662078857421875, 112.7009048461914, 172.63751220703125, 112.35964965820312, -3.0716476440429688, -52.35505676269531, 3.306640625, 191.53854370117188, 53.30403137207031, -100.23834228515625, 46.04440689086914, 109.0833740234375, 11.4515380859375, 53.0234375, 106.14715576171875, 168.30313110351562, -23.878524780273438, -158.40524291992188, 36.006591796875, -169.57403564453125, 121.89898681640625, 47.06781005859375, 156.95187377929688, 59.55049133300781, -47.133758544921875, 5.129669189453125, 105.28826904296875, 124.08599853515625, -3.1282958984375, -8.499515533447266, 212.8768310546875, 283.0939636230469, 132.5283966064453, 136.08892822265625, 101.8245849609375, 110.53390502929688, -113.84701538085938, 4.1980438232421875, 16.98949432373047, 146.60400390625, 143.1680908203125, 154.27450561523438, -97.7669677734375, 13.737457275390625, -5.760669708251953, 76.70226287841797, -2.6058349609375, -25.432403564453125, 150.09564208984375, -25.814376831054688, 37.837646484375, 137.558837890625, 52.30194091796875, 365.0111083984375, 86.2696533203125, -3.460479736328125, 29.371055603027344, 51.10444259643555, 172.04147338867188, -41.80584716796875, 140.35467529296875, 168.2978973388672, 4.564849853515625, 32.74758529663086, 2.89581298828125, -92.57186889648438, -238.759521484375, 5.4273223876953125, 4.0264129638671875, 23.334152221679688, -39.560340881347656, 0.41131591796875, 162.2410888671875, -38.13055419921875, 176.33657836914062, 53.31414794921875, 129.85256958007812, 54.31414794921875, 42.13232421875, -7.16314697265625, 21.745849609375, 86.43563842773438, 96.51559448242188, 150.2760772705078, 105.1195068359375, 225.43968200683594, 4.8173980712890625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000455.npy"}
{"epoch": 0.9528795811518325, "step": 456, "batch_size": 128, "mean": 76.06837463378906, "std": 133.75111389160156, "min": -200.72555541992188, "p10": -61.36756744384765, "median": 66.01351928710938, "p90": 243.90816955566405, "max": 879.8416137695312, "pos_frac": 0.6953125, "sample": [-10.43975830078125, -24.25603485107422, -120.27350616455078, 181.2667694091797, 82.3920669555664, -65.97502136230469, 74.16328430175781, 181.103271484375, 94.6005859375, -106.2176513671875, -54.491943359375, 30.576324462890625, 122.31759643554688, 151.42294311523438, 48.13592529296875, 66.36590576171875, 64.63201904296875, 273.9293212890625, 170.07254028320312, -195.576416015625, 103.43462371826172, 251.3043212890625, 70.2669906616211, 100.80030822753906, 167.29116821289062, 323.6229248046875, -12.196380615234375, 187.6217041015625, -41.21868896484375, 55.754669189453125, 105.07205200195312, -8.37252426147461, 101.44247436523438, -17.802978515625, 69.47810363769531, -107.900146484375, 122.86349487304688, 248.04750061035156, 89.82667541503906, 113.70071411132812, 254.27076721191406, 45.284423828125, -74.84063720703125, 207.8070068359375, 31.18756103515625, 74.04602813720703, -168.67193603515625, 82.88687133789062, 69.79922485351562, 8.267486572265625, 43.48029327392578, 123.8590087890625, 204.30609130859375, 34.264556884765625, 65.6611328125, 7.1033172607421875, 37.090789794921875, 57.444122314453125, 13.715484619140625, 307.2127380371094, 879.8416137695312, 90.833984375, 278.050048828125, 184.83245849609375, 235.9921875, 85.67282104492188, 160.921875, -200.72555541992188, 293.361328125, -3.3411865234375, 224.149169921875, 15.239089965820312, -4.672578811645508, 26.58271026611328, 102.0535888671875, -45.10559844970703, -59.3929443359375, -58.35578918457031, -74.335205078125, 257.2019348144531, -9.813079833984375, 126.33822631835938, 40.07691955566406, 217.1141357421875, 128.593994140625, -38.085906982421875, 226.584228515625, 105.37020111083984, 167.38909912109375, 169.09854125976562, 121.71588134765625, 222.99346923828125, 162.5361328125, 49.631256103515625, -38.91339111328125, -167.92758178710938, 45.56867980957031, -12.49664306640625, -24.1688232421875, 181.50054931640625, 95.88284301757812, 170.79824829101562, 277.00799560546875, -17.666168212890625, 120.46176147460938, 26.728118896484375, 63.69244384765625, -69.85031127929688, -7.319580078125, -12.981319427490234, 15.678298950195312, -109.9665756225586, 245.74432373046875, 91.751953125, -22.527862548828125, 79.4327392578125, 8.037841796875, -31.49786376953125, -45.151519775390625, -8.712547302246094, -50.874053955078125, 137.7618408203125, 0.0, 2.43658447265625, 247.92971801757812, 8.8671875, -94.90299987792969, 243.12124633789062], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000456.npy"}
{"epoch": 0.9549738219895288, "step": 457, "batch_size": 128, "mean": 73.30323791503906, "std": 108.32868194580078, "min": -371.0119323730469, "p10": -44.15097427368163, "median": 74.35957336425781, "p90": 198.76414794921874, "max": 358.46466064453125, "pos_frac": 0.796875, "sample": [166.34364318847656, 90.56904602050781, -90.08807373046875, 3.8165435791015625, 20.249237060546875, -16.022735595703125, 64.88481140136719, 160.60748291015625, 160.5826416015625, 82.2791748046875, -58.81982421875, 173.17623901367188, 168.22369384765625, 16.109237670898438, -22.709228515625, 77.20745849609375, 107.48976135253906, 358.46466064453125, -27.96490478515625, 8.695587158203125, -81.28533935546875, 241.48558044433594, 11.8603515625, 148.87034606933594, 51.2762451171875, 86.02716064453125, 75.36138916015625, 68.873046875, 20.1654052734375, 160.06072998046875, 193.78182983398438, 164.489501953125, 107.91061401367188, 154.06996154785156, 178.151611328125, 86.84637451171875, 35.94889450073242, 157.44158935546875, -65.04804229736328, 44.06248474121094, 11.943145751953125, 169.18875122070312, 47.16223907470703, 242.20166015625, 55.58049011230469, -224.58544921875, 94.27555084228516, 33.23262023925781, 163.32958984375, -11.108047485351562, -1.979522705078125, 65.3682861328125, 164.55511474609375, 58.55047607421875, 137.7154541015625, 106.30218505859375, 70.773681640625, -79.65838623046875, 160.81411743164062, 110.105224609375, 69.30696868896484, -15.510696411132812, 6.42120361328125, 245.06243896484375, 137.97421264648438, -27.444549560546875, 162.37754821777344, 8.778388977050781, 85.39340209960938, 180.6697235107422, -5.108100891113281, 24.3519287109375, 216.9378662109375, 86.096435546875, 197.69619750976562, 6.22760009765625, -96.51025390625, 233.95361328125, 69.45272827148438, 25.04852294921875, 185.45816040039062, 104.02621459960938, 258.43670654296875, 47.305633544921875, -222.65325927734375, 119.01007080078125, 87.80203247070312, 145.43505859375, 49.06243896484375, 206.26922607421875, 132.37168884277344, -121.817138671875, 169.77566528320312, 6.255327224731445, 0.0, 199.970947265625, 210.69024658203125, 45.85943603515625, 34.69281005859375, 198.2469482421875, 3.485586166381836, -224.26437377929688, 110.57305908203125, 52.4473876953125, 171.23899841308594, 23.179244995117188, 133.80865478515625, 27.206573486328125, -2.8466796875, -41.590667724609375, 123.17654418945312, -50.125022888183594, -371.0119323730469, 104.95468139648438, 28.48053741455078, 107.83375549316406, 107.27837371826172, 218.3712921142578, 267.5713806152344, 112.88302612304688, 41.64581298828125, -74.55740356445312, -28.28875732421875, 246.8699493408203, -2.8640975952148438, 76.34840393066406, 73.35775756835938, 23.067794799804688], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000457.npy"}
{"epoch": 0.9570680628272251, "step": 458, "batch_size": 128, "mean": 66.21015167236328, "std": 104.93473815917969, "min": -207.7281036376953, "p10": -53.72798614501953, "median": 50.7159423828125, "p90": 205.0532531738281, "max": 338.4089660644531, "pos_frac": 0.75, "sample": [12.25732421875, -82.29559326171875, 47.36810302734375, -190.55874633789062, 149.94744873046875, 149.58071899414062, 233.69659423828125, 48.53125, 105.4344482421875, 18.811553955078125, -44.20843505859375, 200.8389892578125, 37.77729034423828, 26.357799530029297, 23.48501968383789, -53.019500732421875, 70.8262939453125, -52.4261474609375, 57.3585205078125, -85.07426452636719, 50.0531005859375, 15.62786865234375, 12.809839248657227, 149.22915649414062, 250.42926025390625, -63.551177978515625, 44.301605224609375, 85.12925720214844, 113.10191345214844, 165.92431640625, 290.861083984375, 173.83966064453125, 192.12571716308594, 27.287796020507812, -37.657257080078125, 182.44247436523438, 52.7459716796875, 84.37002563476562, 163.45437622070312, -40.57106018066406, 186.71356201171875, 87.18026733398438, 92.623291015625, -32.24284362792969, -207.7281036376953, 173.42864990234375, 188.7024688720703, -61.905426025390625, -12.321044921875, 225.92156982421875, 142.98150634765625, 100.18534851074219, 45.557861328125, 13.72259521484375, 36.98333740234375, 190.38510131835938, 235.86099243164062, 30.910568237304688, 294.6478271484375, 33.65264892578125, 106.18292236328125, 72.87265014648438, -181.04058837890625, -69.196533203125, 165.80526733398438, -100.81039428710938, 51.3787841796875, -37.737762451171875, 235.55404663085938, -7.945409774780273, -75.15963745117188, -8.579437255859375, 79.4049072265625, -4.246667861938477, 164.41241455078125, 294.94122314453125, 5.0452880859375, -45.66168975830078, 59.39911651611328, 217.87277221679688, 48.32794189453125, 79.6478271484375, 41.48992919921875, 168.20611572265625, 63.209442138671875, -55.38111877441406, 76.45439147949219, 141.87930297851562, 53.302520751953125, 101.37322998046875, 25.8642578125, 222.75408935546875, 141.95909118652344, 194.5924072265625, -5.6242828369140625, 7.19580078125, -149.0645751953125, 1.2902050018310547, -0.4779777526855469, 165.88088989257812, 27.67486572265625, 55.95684814453125, -6.45794677734375, 28.19094467163086, 52.839752197265625, 119.4283447265625, 115.84564208984375, -44.49816131591797, -11.695556640625, 8.120731353759766, 135.66815185546875, 45.837005615234375, 40.775146484375, -65.18843078613281, -37.12196350097656, 139.31175231933594, 261.9827575683594, 194.7913818359375, 64.87423706054688, 338.4089660644531, 6.8407135009765625, 107.96717071533203, 214.88653564453125, 15.136543273925781, 5.253746032714844, 62.82794189453125, 34.748992919921875, -34.78076171875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000458.npy"}
{"epoch": 0.9591623036649215, "step": 459, "batch_size": 128, "mean": 58.1756591796875, "std": 101.66893768310547, "min": -186.26226806640625, "p10": -62.980239868164055, "median": 44.48912048339844, "p90": 204.25616302490232, "max": 299.4432067871094, "pos_frac": 0.671875, "sample": [86.68914794921875, -13.227706909179688, 31.230018615722656, 122.42245483398438, 23.320343017578125, -76.56597900390625, 51.1229248046875, 20.195953369140625, 208.25625610351562, 215.3175048828125, -26.490318298339844, 19.46697998046875, -78.98683166503906, 147.39471435546875, -19.044158935546875, 29.048404693603516, 236.49252319335938, 201.697021484375, 186.45669555664062, 24.618057250976562, -25.246902465820312, 140.35479736328125, 290.65191650390625, 41.065208435058594, -163.45025634765625, -22.4898681640625, 86.453857421875, -88.57804870605469, 299.4432067871094, -72.78335571289062, 202.98622131347656, 129.23019409179688, 20.20477294921875, 61.981292724609375, -125.79438781738281, 99.3050537109375, 178.41473388671875, 6.386968612670898, -28.123870849609375, 59.65313720703125, 46.97442626953125, 84.26910400390625, -13.928237915039062, 102.95645141601562, -1.5825386047363281, 217.76846313476562, 63.950286865234375, 17.55767059326172, -20.264129638671875, -21.428436279296875, -37.18475341796875, 55.3125, -1.575408935546875, 90.53009033203125, 47.87164306640625, -22.240188598632812, 188.2894287109375, 115.96173095703125, 0.7127685546875, 98.08273315429688, -20.795166015625, 26.922607421875, 132.49815368652344, 70.9149169921875, 144.96072387695312, 236.00259399414062, 184.01126098632812, -46.291404724121094, 212.44053649902344, -72.24038696289062, -23.8502197265625, 110.582763671875, 16.99505615234375, -70.7252197265625, 97.14361572265625, 20.377723693847656, 270.26409912109375, -117.25018310546875, 186.21849060058594, 236.29400634765625, 154.1491241455078, 167.69393920898438, -50.04808044433594, -186.26226806640625, -15.021713256835938, -10.375106811523438, 142.58078002929688, 42.003814697265625, 0.0, 32.398292541503906, 119.56074523925781, -15.4278564453125, 18.5926513671875, 266.2066345214844, 213.6795654296875, -9.386672973632812, 5.936670303344727, -47.188079833984375, 68.20478057861328, -23.733299255371094, 89.90371704101562, 78.20684814453125, -102.24114990234375, 138.2488555908203, 145.63937377929688, 40.437652587890625, -18.470571517944336, 0.0, 94.8450927734375, -44.42230224609375, 207.2193603515625, 161.81536865234375, 148.58517456054688, 148.35626220703125, 13.92681884765625, -61.352386474609375, 34.44349670410156, 27.480911254882812, -180.76437377929688, -66.778564453125, 64.01028442382812, 127.34095764160156, -60.92529296875, 112.82687377929688, 61.339874267578125, 70.58892822265625, 120.12173461914062, 136.95179748535156], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000459.npy"}
{"epoch": 0.9612565445026178, "step": 460, "batch_size": 128, "mean": 57.792110443115234, "std": 94.97825622558594, "min": -193.14857482910156, "p10": -51.016314697265614, "median": 50.49871826171875, "p90": 179.12557830810547, "max": 307.1453552246094, "pos_frac": 0.734375, "sample": [28.83587646484375, 80.95513916015625, -73.85409545898438, 74.62176513671875, 8.420440673828125, 241.358642578125, 172.71893310546875, -193.14857482910156, 22.75341796875, 16.78802490234375, 169.9108123779297, 178.89151000976562, 149.13046264648438, 19.709732055664062, 172.67926025390625, -25.247913360595703, 32.04420471191406, -11.95794677734375, -11.693355560302734, 63.608489990234375, 196.8358917236328, -188.50067138671875, 163.56503295898438, -70.32476806640625, 112.5511474609375, 53.609832763671875, -0.715087890625, 248.17727661132812, 56.787872314453125, 127.10955810546875, -122.57757568359375, 6.916927337646484, 140.3428955078125, 180.1569366455078, 58.889678955078125, 34.65217971801758, 148.37489318847656, -68.8424072265625, 231.4486083984375, -4.98028564453125, -48.4984130859375, 31.0633544921875, 58.67242431640625, 70.50698852539062, 21.08913230895996, -76.46220397949219, -45.768192291259766, 1.373321533203125, 31.34619140625, 105.09458923339844, 132.17327880859375, 17.72637939453125, 123.41795349121094, -100.40533447265625, 127.6011962890625, -13.257980346679688, 152.67529296875, 160.1479949951172, 165.26052856445312, 98.75369262695312, 20.032569885253906, 179.67173767089844, 11.04583740234375, 307.1453552246094, -93.27276611328125, 193.73687744140625, -4.694305419921875, 112.03829956054688, -191.0999298095703, 123.07260131835938, -70.407470703125, 80.81787109375, 118.53990173339844, 17.158432006835938, 102.92477416992188, -14.023773193359375, 7.870880126953125, -0.13873291015625, -141.86378479003906, 62.003326416015625, 21.332855224609375, 42.3330078125, 188.04275512695312, -4.560781478881836, 34.824851989746094, -31.4879150390625, -16.050064086914062, -39.170928955078125, 19.897628784179688, 170.66244506835938, 155.66966247558594, 89.92996215820312, 76.443603515625, 45.66852569580078, 125.50515747070312, -20.26068687438965, 2.76531982421875, -27.488693237304688, 63.18559265136719, -56.89141845703125, 72.79302978515625, 153.63311767578125, -26.04498291015625, 246.0320587158203, 245.13031005859375, 10.10003662109375, 82.5894775390625, 84.45889282226562, 186.164306640625, 87.49052429199219, 193.74716186523438, 8.613365173339844, 41.305694580078125, 158.30215454101562, 108.011962890625, -33.48258590698242, 149.28619384765625, 40.798980712890625, 53.49072265625, 75.90640258789062, -15.240966796875, 135.31326293945312, 32.65141296386719, 77.36117553710938, -18.278305053710938, 82.97611236572266, 47.5067138671875, 23.352645874023438], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000460.npy"}
{"epoch": 0.9633507853403142, "step": 461, "batch_size": 128, "mean": 62.60493087768555, "std": 105.5164566040039, "min": -179.5689697265625, "p10": -65.69248886108397, "median": 53.79491424560547, "p90": 190.97755126953123, "max": 415.39324951171875, "pos_frac": 0.6796875, "sample": [68.44110107421875, -70.70462036132812, 200.77664184570312, 20.000476837158203, 115.4312744140625, 163.1051483154297, 55.95794677734375, 149.6096954345703, 121.98303985595703, 50.979705810546875, 149.25051879882812, 41.69844055175781, 167.16024780273438, 138.5087890625, 144.6229248046875, -21.271949768066406, 189.79104614257812, 73.89056396484375, -157.61911010742188, 116.87326049804688, -17.527389526367188, 113.46853637695312, 130.5264892578125, -69.48377990722656, 159.9166259765625, 23.538909912109375, 89.50015258789062, 115.15875244140625, -4.541357040405273, 95.90576934814453, 9.858673095703125, 88.93241882324219, -16.917892456054688, 155.62908935546875, 159.67442321777344, 61.612548828125, 181.04620361328125, 170.7599639892578, -179.5689697265625, -71.82977294921875, 147.59478759765625, 171.90728759765625, 86.88827514648438, -61.220794677734375, 55.27488708496094, -16.525848388671875, -0.14695358276367188, -32.159698486328125, 194.55575561523438, 16.26434326171875, -26.24932098388672, 193.74606323242188, 134.72462463378906, -5.318946838378906, 214.2899627685547, 15.9215087890625, 147.7884521484375, 24.0948486328125, -15.836166381835938, -55.0675048828125, 40.73723602294922, 134.8755645751953, -151.33175659179688, -18.465362548828125, -22.35140037536621, 246.16461181640625, 122.39422607421875, 0.0, 251.38543701171875, -125.85409545898438, -13.850486755371094, 86.39724731445312, 146.43096923828125, -74.39144897460938, 52.31494140625, 415.39324951171875, 82.68145751953125, -145.12109375, -71.63619995117188, 31.988739013671875, -37.845130920410156, 178.079345703125, 10.10174560546875, 155.87548828125, -100.74832153320312, -35.081512451171875, 83.40690612792969, 185.53091430664062, -32.22435760498047, -0.8278961181640625, -42.05902099609375, 4.751472473144531, 42.977874755859375, 70.5455322265625, -114.32769775390625, 261.50909423828125, 10.002548217773438, 349.87493896484375, 69.87692260742188, 119.00215148925781, 125.1448974609375, 43.68855285644531, 21.831192016601562, 117.59033203125, -45.99302673339844, 209.88143920898438, -12.84539794921875, 179.6106719970703, 108.454345703125, 10.102668762207031, -26.367904663085938, 115.22381591796875, 239.134521484375, -43.060272216796875, 44.119964599609375, -98.17767333984375, 121.9052505493164, 14.814193725585938, -47.49678039550781, 44.65019226074219, 7.52581787109375, 149.02987670898438, 17.09619140625, 206.2705078125, -64.0676498413086, -0.105194091796875, 89.98788452148438, 214.62977600097656], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000461.npy"}
{"epoch": 0.9654450261780104, "step": 462, "batch_size": 128, "mean": 75.25979614257812, "std": 110.34843444824219, "min": -177.83251953125, "p10": -62.80628280639648, "median": 73.02622985839844, "p90": 215.8211975097656, "max": 418.6090087890625, "pos_frac": 0.7421875, "sample": [235.19955444335938, 159.33596801757812, 179.25665283203125, 46.2041015625, 93.46142578125, 116.95068359375, -13.5443115234375, 156.9017791748047, 163.26968383789062, 145.646484375, 81.47735595703125, 13.65826416015625, 169.16949462890625, 25.447845458984375, 232.7321014404297, 28.528610229492188, -2.365264892578125, 37.648216247558594, -97.87760925292969, 69.8453369140625, -92.15038299560547, 47.97308349609375, 80.20476531982422, 297.26995849609375, 37.75642395019531, -58.13751220703125, -49.9151611328125, 20.082321166992188, 66.3980712890625, 146.157958984375, -130.46002197265625, 169.95899963378906, 147.14376831054688, 60.057098388671875, 211.82066345214844, 139.422607421875, 225.15577697753906, -11.372634887695312, -128.53091430664062, 28.34332275390625, 53.24951171875, 62.94560241699219, 125.6820068359375, 199.76458740234375, 105.50042724609375, -53.27619171142578, 26.212234497070312, -13.159454345703125, 285.7501220703125, -15.565689086914062, 86.24172973632812, 233.6344757080078, 25.930707931518555, 82.68624877929688, 294.27130126953125, 89.950439453125, -46.94110107421875, 39.78163146972656, 159.299560546875, 228.9510498046875, -124.92788696289062, 138.36041259765625, 142.11961364746094, -61.729217529296875, 98.98863220214844, 71.75332641601562, -136.9928741455078, 150.16015625, 157.87542724609375, 111.86087036132812, 161.8245849609375, -27.382484436035156, -137.97564697265625, -2.52423095703125, 102.33114624023438, -12.292865753173828, 190.04415893554688, -16.641357421875, -3.2453079223632812, 18.39990234375, 111.52108764648438, 116.95819091796875, 160.44049072265625, 52.55909729003906, -5.9019775390625, 64.83819580078125, 88.04095458984375, 32.945404052734375, -101.55485534667969, 1.5591583251953125, -24.72997283935547, 232.13706970214844, 170.74276733398438, 60.70818328857422, -73.90328979492188, -53.221702575683594, 317.34844970703125, 138.1024169921875, 194.35675048828125, -13.992202758789062, 258.1014404296875, 22.923099517822266, 107.33401489257812, 187.05848693847656, 141.64892578125, -65.3194351196289, -130.63040161132812, 131.301025390625, 163.83987426757812, 211.525390625, 3.49658203125, 310.99591064453125, 20.93798828125, 36.8583984375, -19.798608779907227, -85.8721923828125, 100.60354614257812, 6.50927734375, 104.5830078125, 1.929656982421875, 60.944618225097656, 74.29913330078125, 104.08685302734375, 418.6090087890625, 77.89816284179688, 173.4173583984375, 85.81149291992188, -177.83251953125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000462.npy"}
{"epoch": 0.9675392670157068, "step": 463, "batch_size": 128, "mean": 58.423309326171875, "std": 101.77363586425781, "min": -291.82080078125, "p10": -40.08180694580078, "median": 41.94873809814453, "p90": 198.53251953125, "max": 343.17242431640625, "pos_frac": 0.734375, "sample": [77.29315185546875, -71.74118041992188, 83.32870483398438, -79.01930236816406, 28.491348266601562, 18.66790771484375, -10.4686279296875, 247.2552490234375, -8.300300598144531, 134.1446075439453, 13.152252197265625, 4.3915252685546875, 48.419189453125, -17.364816665649414, 8.878608703613281, 149.07667541503906, 17.610183715820312, 42.59257507324219, 104.69195556640625, 92.73922729492188, 16.854366302490234, 197.33856201171875, 45.667388916015625, -79.62394714355469, 195.89111328125, 87.44464111328125, 151.5081787109375, 270.8252868652344, 2.52154541015625, 126.25115966796875, -13.532806396484375, 94.6507568359375, 4.011688232421875, -5.559030532836914, 176.1197509765625, -29.3477783203125, 176.2059326171875, 234.79331970214844, 324.22802734375, 56.53573989868164, 13.229034423828125, -184.61862182617188, -52.55108642578125, 33.25661849975586, 230.05667114257812, 139.71389770507812, -25.976669311523438, 70.85714721679688, 87.93106079101562, 69.0220947265625, 193.21487426757812, -57.700897216796875, 32.231903076171875, -21.358558654785156, -20.099334716796875, 6.672636032104492, 343.17242431640625, 22.514734268188477, 39.611019134521484, 215.3224639892578, 220.32632446289062, 76.76657104492188, 149.4250946044922, 114.0523681640625, -210.02496337890625, -177.38275146484375, 105.63352966308594, 11.870269775390625, 102.67926025390625, 35.66595458984375, 77.51419067382812, 201.31842041015625, -4.281515121459961, 12.954948425292969, -41.076873779296875, 101.02163696289062, 175.459716796875, 106.68429565429688, 141.25363159179688, 97.74440002441406, 77.08177185058594, 137.45211791992188, 42.756683349609375, -15.3873291015625, 30.325637817382812, 39.61116027832031, 1.153900146484375, 124.47068786621094, 231.09945678710938, -21.39974594116211, -4.288726806640625, 103.89892578125, 190.11376953125, 41.304901123046875, 49.69645690917969, 117.10113525390625, 12.160552978515625, -53.312034606933594, 66.12274169921875, 98.7139892578125, 33.19982147216797, -13.11083984375, 256.31268310546875, 4.121192932128906, 236.7079620361328, 119.7728271484375, -19.8568115234375, 134.9019012451172, 55.602020263671875, 202.3736572265625, 22.1239013671875, -36.662200927734375, 1.235443115234375, 56.19770812988281, -45.86333465576172, 60.63865661621094, 105.496337890625, -17.90079116821289, -82.63247680664062, 10.853424072265625, -39.65534973144531, 0.0, -22.5400390625, 20.368221282958984, 13.780517578125, 122.24810791015625, -291.82080078125, -25.112823486328125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000463.npy"}
{"epoch": 0.9696335078534032, "step": 464, "batch_size": 128, "mean": 59.238311767578125, "std": 100.62702178955078, "min": -205.58599853515625, "p10": -58.423454284667955, "median": 47.00802230834961, "p90": 184.5606475830078, "max": 319.8241271972656, "pos_frac": 0.6640625, "sample": [0.0, 107.29960632324219, -0.7459640502929688, 203.8527069091797, -9.732421875, 160.7119903564453, 166.0255126953125, 230.6058349609375, 147.21929931640625, 44.518096923828125, 154.92861938476562, -54.757843017578125, -11.907470703125, 104.29338073730469, 72.26004028320312, 37.594146728515625, 260.1672668457031, -22.134521484375, 26.663925170898438, 20.0714111328125, 7.0462493896484375, -26.537353515625, -6.894775390625, -15.370628356933594, 169.0283660888672, 221.3347930908203, 147.66033935546875, 189.22207641601562, -9.008392333984375, 97.85772705078125, 162.7197265625, 118.51151275634766, -82.92232513427734, 172.9139862060547, 65.05416870117188, -85.48760986328125, 26.945877075195312, 46.32512664794922, -28.60723876953125, -73.2059097290039, 235.831787109375, 157.49288940429688, 55.966407775878906, -74.89483642578125, -26.85407257080078, 183.38729858398438, -23.759410858154297, 61.78863525390625, 104.22998046875, 121.2535400390625, -52.2186279296875, -205.58599853515625, 65.30671691894531, 288.4610900878906, 73.24076080322266, 201.94357299804688, -91.6986083984375, 78.24560546875, -2.614177703857422, -49.348602294921875, 166.145751953125, 171.3154296875, 169.31187438964844, -153.7962646484375, 85.43460083007812, -7.03089714050293, 47.69091796875, 10.126373291015625, 12.29083251953125, 29.13677978515625, 115.29763793945312, 319.8241271972656, 76.02993774414062, 111.14370727539062, 5.413719177246094, -78.21536254882812, 135.48672485351562, 102.47793579101562, -126.78788757324219, 187.2984619140625, 38.64585876464844, -3.2050933837890625, 87.72335815429688, -10.700088500976562, 217.484619140625, -25.48699951171875, 163.9996337890625, 91.6793212890625, 24.4407958984375, 8.708274841308594, 33.242156982421875, -158.42648315429688, 182.79928588867188, 29.1678466796875, 56.982967376708984, 129.20407104492188, 170.4937744140625, 54.8492431640625, -106.80412292480469, 11.371204376220703, 171.33901977539062, -1.4169769287109375, -28.240234375, -40.1905517578125, 225.89434814453125, 12.0294189453125, 8.61431884765625, 45.59326171875, -44.49444580078125, 176.76904296875, -11.074234008789062, 110.16836547851562, -24.373199462890625, -17.825164794921875, 240.67474365234375, 70.8310317993164, 179.98570251464844, -66.97654724121094, 166.36669921875, -8.236820220947266, 98.00311279296875, -33.84442138671875, -33.854736328125, 111.7144775390625, 27.05877685546875, -80.80133056640625, 54.564727783203125, 65.76840209960938], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000464.npy"}
{"epoch": 0.9717277486910995, "step": 465, "batch_size": 128, "mean": 75.71699523925781, "std": 104.46614074707031, "min": -163.59817504882812, "p10": -42.64092407226562, "median": 80.39204406738281, "p90": 213.04990234375, "max": 364.66949462890625, "pos_frac": 0.7421875, "sample": [-21.687454223632812, -10.219329833984375, 110.95716857910156, 32.61895751953125, 49.743316650390625, 102.8475341796875, 89.45602416992188, -94.75155639648438, 97.27987670898438, 234.83987426757812, 166.15475463867188, 290.6680908203125, -40.19805908203125, 20.024826049804688, 159.54920959472656, 121.9088134765625, 302.5194091796875, 90.9815673828125, 176.95184326171875, 186.39266967773438, 314.0217590332031, 208.8310546875, 164.650390625, 74.14301300048828, -95.40304565429688, 2.276580810546875, 10.35986328125, 156.8822021484375, 234.716064453125, 167.20803833007812, 144.6004638671875, 65.8924560546875, 194.03759765625, -14.335638046264648, 0.74163818359375, 236.77447509765625, -45.75396728515625, 212.7215576171875, 92.73469543457031, 261.7605285644531, 114.69989013671875, -110.76220703125, 30.487152099609375, 175.14016723632812, 118.79933166503906, 135.2601776123047, 104.99357604980469, -107.23888397216797, 108.39395141601562, 85.2995376586914, 68.87064361572266, 225.22003173828125, 72.015869140625, 191.69467163085938, 181.7989501953125, 148.5146484375, 13.692962646484375, 213.8160400390625, 174.79879760742188, -11.415283203125, -107.19757080078125, 196.0775909423828, 40.594696044921875, -15.10833740234375, 80.1751708984375, 15.121551513671875, 21.53741455078125, 41.530029296875, 33.722198486328125, 118.49868774414062, -20.46441650390625, -67.73046875, 41.62957763671875, 67.18218994140625, 15.594451904296875, 197.29092407226562, 207.59396362304688, 80.60891723632812, -23.550506591796875, -69.93582153320312, 72.65635681152344, 28.581634521484375, 95.46306610107422, -129.56353759765625, 20.783370971679688, 152.32366943359375, -24.7366943359375, 83.65509033203125, 47.31951904296875, 9.769500732421875, 364.66949462890625, 233.78683471679688, -21.54613494873047, 106.333251953125, -5.3289794921875, 0.561981201171875, -29.674522399902344, -7.3394775390625, -91.34344482421875, -38.94110107421875, 25.257736206054688, 94.26760864257812, -17.064956665039062, -36.18962097167969, 133.71514892578125, 115.14521789550781, 44.8990478515625, 103.30398559570312, 120.26748657226562, 128.8634033203125, 97.12969970703125, 150.06692504882812, -4.36700439453125, 173.1749267578125, 3.547271728515625, 82.57101440429688, -141.625732421875, 32.43365478515625, -59.389923095703125, -13.384536743164062, 227.6849365234375, 90.63423156738281, -41.3067626953125, 102.53213500976562, 239.88858032226562, -163.59817504882812, -17.39617919921875, 113.13787841796875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000465.npy"}
{"epoch": 0.9738219895287958, "step": 466, "batch_size": 128, "mean": 78.388427734375, "std": 90.90984344482422, "min": -221.41665649414062, "p10": -17.69653167724609, "median": 62.12086486816406, "p90": 204.94776916503906, "max": 363.20086669921875, "pos_frac": 0.8125, "sample": [112.52767944335938, 105.4711685180664, 108.35417175292969, 219.7783203125, -7.400230407714844, 144.845458984375, 137.68963623046875, -9.704620361328125, -56.55814743041992, -16.613357543945312, 154.79425048828125, -46.859596252441406, 1.5148773193359375, 82.62515258789062, 62.419219970703125, 136.724853515625, 50.988494873046875, 42.70607376098633, 101.744384765625, 117.11631774902344, 56.382049560546875, 279.7689208984375, -116.17233276367188, -221.41665649414062, 23.044456481933594, 11.2255859375, 363.20086669921875, -7.12432861328125, -6.4832611083984375, 34.966888427734375, 153.5070037841797, 77.78350830078125, 19.0980224609375, 261.9717712402344, 51.67205810546875, 0.0, 118.01651000976562, 55.128387451171875, 187.5012969970703, 79.56207275390625, 58.99150085449219, 78.2259521484375, 21.897979736328125, 54.921783447265625, -3.05615234375, 12.343940734863281, 152.53204345703125, 174.13180541992188, 246.9737091064453, 14.462936401367188, 0.0, 126.96026611328125, 64.9767837524414, 1.854888916015625, -4.3218841552734375, 251.05133056640625, 34.13177490234375, -84.50920104980469, 39.73480224609375, 54.34486389160156, 61.22869873046875, 58.78059387207031, 15.61328125, 57.8140869140625, 16.317596435546875, 78.33174133300781, 95.10359191894531, -26.44037628173828, 250.00135803222656, 4.017059326171875, 183.80078125, 77.49139404296875, -7.660400390625, 60.76141357421875, 69.4891128540039, 18.100677490234375, 57.55027770996094, -81.31842041015625, 49.70343017578125, 149.813720703125, 161.0999755859375, 204.431640625, -33.76660919189453, 98.50132751464844, 32.044517517089844, 0.0, 136.69061279296875, 26.279388427734375, 68.608642578125, 167.085205078125, 126.39805603027344, -34.68479919433594, 32.984588623046875, 33.17760467529297, 250.66754150390625, 93.49777221679688, 102.43585205078125, 61.822509765625, 105.096923828125, 205.61224365234375, 124.2635498046875, 228.5975341796875, 75.87574768066406, 183.51763916015625, 182.18357849121094, 31.394271850585938, 5.144744873046875, 143.54843139648438, 94.11318969726562, 199.27708435058594, 83.96731567382812, 144.88455200195312, 125.5338134765625, 18.48126220703125, 68.58261108398438, 204.66299438476562, -35.12788391113281, 145.02940368652344, 171.34442138671875, 216.7896728515625, 46.09257507324219, -54.7119140625, 229.3289794921875, 23.83654022216797, -20.22393798828125, -44.690208435058594, 42.78607177734375, 213.30714416503906], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000466.npy"}
{"epoch": 0.9759162303664921, "step": 467, "batch_size": 128, "mean": 58.257843017578125, "std": 110.60601806640625, "min": -172.52044677734375, "p10": -72.53600616455077, "median": 45.391929626464844, "p90": 196.6661178588867, "max": 464.12457275390625, "pos_frac": 0.7109375, "sample": [234.93853759765625, 104.25997924804688, 79.73773193359375, 87.42697143554688, 198.27276611328125, 464.12457275390625, 238.14942932128906, 144.82569885253906, 34.22802734375, 180.38226318359375, -29.97625732421875, 144.69276428222656, 182.6310577392578, 45.97053527832031, 29.181060791015625, -118.77790832519531, 157.86672973632812, 32.83837890625, 91.3822250366211, 9.992910385131836, -27.22966766357422, 77.9830322265625, 6.135955810546875, -61.085693359375, 0.0, 55.00749206542969, 131.90322875976562, 21.755859375, -38.061492919921875, 135.9659423828125, 17.68638038635254, -59.95062255859375, -14.027946472167969, -6.774391174316406, 37.81884765625, 8.285049438476562, 2.520965576171875, 186.44610595703125, 62.41886901855469, -142.7913818359375, -22.143783569335938, 99.674560546875, -67.50688171386719, 68.35475158691406, -45.41496276855469, 82.15984344482422, -116.20147705078125, 106.66357421875, 52.73931884765625, 3.221658706665039, 41.47821044921875, -161.02401733398438, -84.2706298828125, 117.79521179199219, 193.33921813964844, 8.45916748046875, 96.63177490234375, 77.28340148925781, 195.97755432128906, -121.92770385742188, -144.04481506347656, 152.16192626953125, 303.59515380859375, 129.9065399169922, 152.01007080078125, 81.3564453125, -120.88580322265625, -51.51971435546875, 178.873046875, 23.322906494140625, -6.285722732543945, 131.79183959960938, 219.85525512695312, 24.76507568359375, 37.96315002441406, 108.81269073486328, -26.601762771606445, -88.91876220703125, 183.0139923095703, 246.442626953125, 93.51364135742188, 0.0, 4.0032958984375, 28.594924926757812, -19.72174072265625, 45.13249206542969, -164.81475830078125, -172.52044677734375, 205.53326416015625, 75.36756896972656, -148.89529418945312, 244.04461669921875, 183.7321014404297, 42.072410583496094, 156.73863220214844, 182.28074645996094, 120.9737548828125, -14.72344970703125, 23.68841552734375, 72.87100219726562, 121.12744140625, 191.5911865234375, -16.114511489868164, -54.57568359375, 123.69451904296875, -132.13824462890625, 45.6513671875, 64.9520034790039, -61.7705078125, -45.835205078125, -0.30889129638671875, 0.85772705078125, 32.870513916015625, 207.2301483154297, 3.67095947265625, -13.759063720703125, -19.699783325195312, 229.7745361328125, 84.0504150390625, 238.4456787109375, 94.490966796875, 41.47962951660156, 109.57281494140625, 53.04165267944336, 76.00473022460938, 314.56256103515625, 0.866912841796875, 42.341796875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000467.npy"}
{"epoch": 0.9780104712041885, "step": 468, "batch_size": 128, "mean": 58.979515075683594, "std": 101.11334991455078, "min": -387.4185791015625, "p10": -41.935933303833, "median": 39.806610107421875, "p90": 205.89327087402341, "max": 331.3763427734375, "pos_frac": 0.703125, "sample": [109.98736572265625, 212.1767578125, 83.80087280273438, -36.719688415527344, 202.50250244140625, -5.350286483764648, 139.1295166015625, -23.073867797851562, 26.62982177734375, 204.81460571289062, 15.1976318359375, -17.98358154296875, 176.9292755126953, 6.009788513183594, 54.106903076171875, -34.88001251220703, 247.98849487304688, -6.285003662109375, 4.732875823974609, 72.66993713378906, 117.97776794433594, 29.316482543945312, 13.65716552734375, -7.63470458984375, -0.46050262451171875, 243.648193359375, 94.83609771728516, 211.4005126953125, 140.68121337890625, 111.02926635742188, -28.039703369140625, 33.98333740234375, 116.25022888183594, 7.383697509765625, -17.731857299804688, 210.8639373779297, -7.0088043212890625, 208.41015625, -387.4185791015625, 27.3323974609375, 106.60110473632812, 101.27874755859375, -6.01141357421875, 54.51177978515625, -48.27166748046875, 19.0736083984375, 35.85511779785156, 39.76446533203125, -45.620452880859375, 71.54232788085938, 195.6527099609375, 33.7406005859375, 155.36744689941406, 216.05780029296875, 101.17325592041016, 18.5172119140625, 62.11956787109375, 122.01982879638672, 110.6295166015625, -29.133989334106445, 168.10702514648438, -33.48711395263672, -63.173004150390625, -19.775550842285156, 28.977678298950195, 30.010940551757812, 43.20977783203125, -86.4996337890625, 91.36094665527344, 57.99554443359375, -42.700439453125, 103.16445922851562, 40.01153564453125, 251.08929443359375, 85.26177978515625, 160.0986785888672, 25.50555419921875, 136.6439971923828, 24.07671356201172, 126.75782775878906, -38.284332275390625, 141.3822021484375, 168.8488006591797, 39.8487548828125, -166.97357177734375, -55.735931396484375, -70.05648803710938, 83.30535888671875, -41.6082878112793, 65.23573303222656, 90.03900146484375, 19.294315338134766, 111.83589172363281, 130.14691162109375, 114.9267578125, 130.18182373046875, 35.804046630859375, 116.64334106445312, 6.908935546875, -28.777931213378906, 12.09918212890625, -35.563743591308594, 150.96524047851562, 43.855804443359375, 46.1849365234375, -13.936206817626953, 10.66400146484375, -17.496185302734375, 331.3763427734375, -49.001708984375, -11.822654724121094, 4.8859405517578125, 71.01611328125, -29.43316650390625, -78.93740844726562, 156.05923461914062, 87.8956298828125, 276.33441162109375, -11.5894775390625, -71.09811401367188, 6.104583740234375, 293.9652099609375, 11.935546875, 182.35821533203125, -67.10792541503906, 224.88821411132812, -37.386383056640625, 216.83355712890625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000468.npy"}
{"epoch": 0.9801047120418848, "step": 469, "batch_size": 128, "mean": 75.91983032226562, "std": 105.28402709960938, "min": -207.6033935546875, "p10": -61.76966247558593, "median": 60.806549072265625, "p90": 217.15516967773436, "max": 307.9566345214844, "pos_frac": 0.7890625, "sample": [49.032501220703125, 239.404296875, 0.0, 19.282901763916016, 228.6866455078125, 216.614990234375, 34.52910232543945, 106.34501647949219, 201.27154541015625, 186.50665283203125, 27.659332275390625, 114.96891784667969, 218.41558837890625, 50.27978515625, 20.620811462402344, 168.3424072265625, -186.9366455078125, 213.09552001953125, 71.85545349121094, 46.78497314453125, 5.6360626220703125, 18.47711181640625, 5.731533050537109, 107.610107421875, 168.41702270507812, 76.44694519042969, 53.94024658203125, 74.46879577636719, 238.820556640625, 52.19305419921875, 32.608089447021484, 58.942626953125, 45.4610595703125, 76.29096984863281, -8.7496337890625, 47.09912109375, 194.88140869140625, 50.7216796875, 227.9122314453125, 156.776123046875, -64.76019287109375, 74.08963012695312, 20.872596740722656, -24.96065902709961, 196.93106079101562, 186.15255737304688, -102.48876953125, 9.646804809570312, -86.20547485351562, 121.2296142578125, 37.59008026123047, 53.36526107788086, 75.48605346679688, -23.31983184814453, 103.13473510742188, 307.9566345214844, 207.21383666992188, -14.892471313476562, -207.6033935546875, 10.235340118408203, 45.007301330566406, 257.9000244140625, 59.818695068359375, 156.1480712890625, -101.3402099609375, 21.582809448242188, 59.34880065917969, 117.11190795898438, -2.3200149536132812, -57.86798095703125, 81.6187744140625, 3.4730224609375, -73.98623657226562, 153.46432495117188, 40.24456787109375, 104.34698486328125, 100.91876220703125, 181.93142700195312, 174.54580688476562, -90.01947021484375, 249.47125244140625, 216.5086669921875, -177.36453247070312, -6.296331405639648, 9.053337097167969, 59.601043701171875, 165.106201171875, 129.69131469726562, 88.32569885253906, -15.971664428710938, 61.794403076171875, 206.623291015625, 171.91766357421875, 137.2418212890625, 63.682579040527344, -68.314208984375, 200.31634521484375, -60.488006591796875, 73.38290405273438, 32.48542785644531, 108.73362731933594, -36.4217529296875, 39.561248779296875, 80.96975708007812, 178.44479370117188, 29.215576171875, 129.2510986328125, 95.7427978515625, 17.559539794921875, -134.058349609375, 235.703369140625, -74.71157836914062, 82.87464904785156, 198.62586975097656, 13.203094482421875, 294.56005859375, 175.72308349609375, 224.38458251953125, 256.1617431640625, 18.048980712890625, -10.18853759765625, 168.4786376953125, -33.1343994140625, -72.68934631347656, 62.27586364746094, 58.36810302734375, 270.3435974121094, -18.101043701171875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000469.npy"}
{"epoch": 0.9821989528795811, "step": 470, "batch_size": 128, "mean": 74.77116394042969, "std": 102.3546142578125, "min": -200.0211181640625, "p10": -43.911730957031246, "median": 59.72607421875, "p90": 208.46175384521482, "max": 393.6311950683594, "pos_frac": 0.796875, "sample": [-48.02679443359375, 39.041015625, 162.86495971679688, 188.8948974609375, 6.62823486328125, 11.41912841796875, 100.287841796875, -101.32168579101562, 252.59646606445312, 13.759666442871094, -35.32128143310547, 32.44952392578125, 18.810226440429688, 63.779541015625, 74.22027587890625, 127.39785766601562, 192.909912109375, 16.15943145751953, -28.299896240234375, -82.61956787109375, 102.81990051269531, 213.7936248779297, 54.083099365234375, -85.29641723632812, 59.332672119140625, 79.78126525878906, 220.83425903320312, 198.58966064453125, 114.736572265625, -17.459228515625, 118.25108337402344, 260.44659423828125, -74.32452392578125, 41.3546142578125, 1.4628372192382812, 62.66575622558594, 146.34542846679688, 89.83834838867188, 54.374420166015625, -63.43022155761719, 23.960994720458984, -108.22897338867188, -0.051177978515625, 25.31646728515625, 38.062408447265625, 110.78683471679688, -3.11761474609375, 122.56442260742188, 16.951080322265625, 89.49176025390625, 0.064178466796875, 132.6669921875, 226.8676300048828, 290.83026123046875, 34.368568420410156, 254.00144958496094, -24.8072509765625, -131.64495849609375, -7.3692779541015625, 138.74502563476562, 31.6441650390625, 114.9193115234375, 107.25848388671875, 43.578643798828125, 117.61279296875, 49.87786865234375, -42.14813232421875, 2.423858642578125, 63.442474365234375, 237.87539672851562, 184.2303009033203, 28.2877197265625, 283.140625, 81.2242431640625, 40.42565155029297, 65.09857177734375, 26.495010375976562, -37.14894104003906, 184.48895263671875, 13.715240478515625, 157.4595947265625, 25.552139282226562, -74.7054443359375, 35.63441467285156, 74.63589477539062, 132.16070556640625, 25.598134994506836, 18.069992065429688, 206.17666625976562, 317.2731018066406, -67.13496398925781, 393.6311950683594, 172.67376708984375, -70.62203979492188, 72.883544921875, -6.25531005859375, 43.893585205078125, 166.76641845703125, 128.327880859375, 120.97454833984375, 14.163970947265625, 233.5060272216797, 128.13088989257812, 8.894493103027344, 139.63040161132812, 108.26296997070312, 193.555908203125, 132.43014526367188, 123.30038452148438, 7.39031982421875, 142.648193359375, -14.06494140625, 18.47894287109375, 52.95111083984375, 205.4280242919922, 99.35635375976562, 15.572906494140625, -200.0211181640625, 60.119476318359375, -87.43426513671875, 111.10965728759766, 175.0939483642578, -20.613067626953125, 12.90179443359375, 195.53887939453125, 190.2313690185547, 264.52349853515625, -23.10107421875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000470.npy"}
{"epoch": 0.9842931937172775, "step": 471, "batch_size": 128, "mean": 64.23965454101562, "std": 111.40602111816406, "min": -190.44346618652344, "p10": -49.06919937133789, "median": 53.77442932128906, "p90": 185.2703659057617, "max": 646.540771484375, "pos_frac": 0.75, "sample": [51.20121765136719, 121.57530212402344, 41.639129638671875, 110.51992797851562, 125.11947631835938, -5.8182373046875, 11.03338623046875, 70.96102905273438, 64.68896484375, 100.39804077148438, -20.038612365722656, 19.359954833984375, 124.43478393554688, 1.7607650756835938, 83.260986328125, 19.36865234375, -121.79254150390625, -3.716592788696289, -6.9521331787109375, 12.322479248046875, 309.74456787109375, 4.581703186035156, -77.25361633300781, -48.59656524658203, 107.1473388671875, -2.21380615234375, -80.400390625, -7.962390899658203, 198.77471923828125, -160.08242797851562, 140.36785888671875, -34.79170227050781, 111.07591247558594, 241.1629638671875, 67.2640380859375, 39.454925537109375, -2.2364501953125, 89.87728118896484, 64.25816345214844, 4.17431640625, 23.2037353515625, 35.44366455078125, 31.248504638671875, 0.1561279296875, 207.08892822265625, 71.7027587890625, -90.12469482421875, 177.42010498046875, -129.506103515625, 0.05841064453125, 68.71737670898438, 49.47456359863281, 177.67921447753906, 30.249404907226562, 186.4171905517578, 28.73443603515625, 92.021484375, 162.7200164794922, 106.43508911132812, 76.16104125976562, 245.29345703125, 0.0, 62.209503173828125, -10.712471008300781, 4.447713851928711, 29.495346069335938, -190.44346618652344, -21.290252685546875, 26.362457275390625, 6.596923828125, 139.80294799804688, 154.25726318359375, -50.17201232910156, 4.23681640625, 397.09893798828125, -13.511749267578125, 158.4119110107422, 6.558494567871094, -0.32672882080078125, 82.21041870117188, -8.297882080078125, 59.1494140625, -116.79608154296875, 44.91206359863281, 61.3397216796875, 155.27337646484375, 94.65945434570312, 178.32705688476562, 105.15203857421875, -4.3900146484375, 176.69190979003906, 159.32293701171875, 252.2423095703125, 299.1271667480469, 266.6649169921875, -41.744171142578125, 82.88623046875, 149.57948303222656, -22.23023223876953, 39.80987548828125, 10.370706558227539, 117.11042785644531, 14.47393798828125, 194.608642578125, -82.03054809570312, 81.88035583496094, 129.1923828125, 135.68223571777344, 218.16961669921875, 646.540771484375, 3.4892044067382812, -37.387725830078125, 64.94528198242188, 56.34764099121094, 154.70974731445312, 9.26580810546875, 20.578094482421875, 58.273529052734375, 60.480010986328125, 184.77886962890625, 83.33815002441406, 69.96966552734375, 14.36517333984375, 156.38076782226562, 134.1177978515625, -72.407470703125, -142.02801513671875, -93.71826171875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000471.npy"}
{"epoch": 0.9863874345549738, "step": 472, "batch_size": 128, "mean": 71.73892211914062, "std": 95.981201171875, "min": -132.61917114257812, "p10": -37.76636505126953, "median": 55.52717590332031, "p90": 191.66537322998047, "max": 371.999267578125, "pos_frac": 0.765625, "sample": [276.93804931640625, 46.442962646484375, -41.5848274230957, 3.2119903564453125, 44.520050048828125, 309.5535888671875, -52.57814025878906, -37.019866943359375, -97.5518569946289, 66.20309448242188, -36.10736083984375, 142.6009979248047, 55.74139404296875, 53.280029296875, 183.30056762695312, 173.31356811523438, 116.48968505859375, 149.18475341796875, 78.20032501220703, 92.25863647460938, 311.2965393066406, -9.91668701171875, -36.81292724609375, 153.90245056152344, -35.863372802734375, 243.54736328125, 273.0729675292969, 142.72747802734375, -2.209737777709961, 21.163421630859375, 46.220062255859375, -66.71109008789062, 15.141706466674805, 22.25823974609375, 56.289756774902344, 42.184295654296875, 184.34849548339844, 138.93539428710938, 2.27587890625, 2.3049697875976562, -49.325469970703125, 58.21636962890625, 128.4860076904297, 50.90380859375, 192.05772399902344, -72.45037841796875, 46.037139892578125, 181.53726196289062, 18.375343322753906, 6.84913444519043, 23.412155151367188, 6.353427886962891, 100.52740478515625, -4.846870422363281, 149.12716674804688, 154.1772918701172, 33.018035888671875, 191.49722290039062, 213.44589233398438, 274.83087158203125, 1.3436431884765625, 371.999267578125, -18.909393310546875, 95.86236572265625, -0.2205810546875, -0.18902587890625, 76.32672119140625, 5.782196044921875, 81.2347412109375, 77.24127197265625, 57.239501953125, 38.25213623046875, -28.14269256591797, 150.3055419921875, 118.1450424194336, -57.90740966796875, 313.40399169921875, 70.04306030273438, 12.8892822265625, 133.5880584716797, 45.74462890625, 234.51669311523438, 66.85305786132812, 125.69207763671875, -0.727294921875, 157.35769653320312, -6.9708251953125, 12.43634033203125, 45.41435241699219, -132.61917114257812, 80.51470947265625, -52.70904541015625, 166.55206298828125, 130.06683349609375, -71.32785034179688, 55.312957763671875, 217.39443969726562, 112.3140640258789, -65.94265747070312, 162.6033172607422, 56.340965270996094, 46.76177978515625, 29.12493896484375, 37.46098327636719, 3.9562759399414062, 150.9454345703125, 120.09356689453125, -66.79456329345703, 52.932525634765625, 182.73486328125, -22.366783142089844, -22.736392974853516, 131.84507751464844, -13.956441879272461, 214.54705810546875, 47.82765197753906, -21.47673225402832, 67.50796508789062, 109.37565612792969, 87.2459716796875, 114.17713928222656, 11.837295532226562, 88.34844970703125, 76.81661987304688, 72.29644775390625, 140.185791015625, -39.50819396972656, 13.512161254882812], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000472.npy"}
{"epoch": 0.9884816753926702, "step": 473, "batch_size": 128, "mean": 75.92730712890625, "std": 100.27095031738281, "min": -263.47906494140625, "p10": -41.277008819580075, "median": 67.39810943603516, "p90": 198.2524810791015, "max": 376.0823974609375, "pos_frac": 0.7890625, "sample": [15.780082702636719, 183.34780883789062, 64.69259643554688, -54.20758819580078, 107.59848022460938, 100.24542236328125, -7.71124267578125, -20.755294799804688, -24.06536865234375, -87.68758392333984, 187.84405517578125, 172.94091796875, 96.88687133789062, 103.2679443359375, -8.605079650878906, 96.2469482421875, 5.580718994140625, 53.309478759765625, 215.62310791015625, 187.49012756347656, 86.7828140258789, 83.66607666015625, -65.74999237060547, -40.28327941894531, 10.734817504882812, -23.860122680664062, 214.15884399414062, 140.3068389892578, 24.818641662597656, 162.1325225830078, 81.17401123046875, 23.909576416015625, 43.71553039550781, -7.4132080078125, 1.24298095703125, 34.4361572265625, 17.784103393554688, -53.13194274902344, 210.82550048828125, 69.41192626953125, 20.193389892578125, 157.02932739257812, 217.09677124023438, 224.21328735351562, -10.410329818725586, 243.31417846679688, -32.50006103515625, 298.1322021484375, 4.03076171875, 165.4793701171875, 108.16461181640625, 188.6759796142578, 230.299072265625, 95.28441619873047, 150.2060089111328, 0.0, 108.52731323242188, 151.8431396484375, -263.47906494140625, 11.599916458129883, 157.25234985351562, -178.44065856933594, 69.05514526367188, 162.11085510253906, 107.42718505859375, 45.8450927734375, 119.94882202148438, 14.112281799316406, 65.74107360839844, 97.329345703125, 83.75552368164062, 27.30063819885254, 55.0283203125, 22.6126708984375, 35.763214111328125, 112.84164428710938, -12.44964599609375, 42.014678955078125, 219.24713134765625, -146.62307739257812, 42.344329833984375, 75.6217041015625, -6.335168838500977, 189.048583984375, 127.75567626953125, 45.007415771484375, -69.9495849609375, 100.80511474609375, 168.84939575195312, -27.8380126953125, 85.07659912109375, 182.0574951171875, 259.16949462890625, 243.84417724609375, 153.61495971679688, 113.66878509521484, -103.3609619140625, 9.32557487487793, -43.59571075439453, 64.47216796875, 62.166900634765625, -54.97749328613281, -117.86154174804688, 376.0823974609375, 137.02484130859375, 54.92546081542969, 148.8264923095703, 166.49749755859375, 33.526214599609375, 153.85366821289062, 189.7052001953125, 1.320770263671875, 154.7162628173828, 65.73211669921875, 57.2196044921875, 192.86404418945312, 42.538787841796875, 24.57453727722168, 180.3499755859375, 54.25189208984375, 23.87853240966797, 104.93112182617188, -43.71006774902344, -27.617042541503906, 230.43231201171875, 161.61557006835938, 107.89900207519531, 58.2890625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000473.npy"}
{"epoch": 0.9905759162303664, "step": 474, "batch_size": 128, "mean": 61.01691818237305, "std": 90.98340606689453, "min": -175.79052734375, "p10": -48.53290405273437, "median": 49.056190490722656, "p90": 175.92079467773436, "max": 350.2206115722656, "pos_frac": 0.7734375, "sample": [18.872894287109375, -4.52593994140625, 350.2206115722656, -8.3870849609375, 29.893592834472656, 74.97409057617188, 42.16546630859375, 45.84722900390625, 0.3288726806640625, 16.704803466796875, 11.67059326171875, 172.11135864257812, 22.412994384765625, 25.586074829101562, 35.025054931640625, 24.07806396484375, 137.4272918701172, 48.70381164550781, 241.50253295898438, 118.5338134765625, 116.45221710205078, -10.351806640625, 65.32876586914062, 132.47219848632812, -41.513458251953125, -17.8333740234375, 28.444854736328125, 158.53236389160156, 39.18048095703125, 148.0986328125, 22.280517578125, 72.80374145507812, 0.6917686462402344, -7.83905029296875, 39.52783203125, 252.79901123046875, 177.36236572265625, 26.336389541625977, -23.044288635253906, 174.27777099609375, 118.36611938476562, 174.54356384277344, 49.4085693359375, 29.623138427734375, -52.48554992675781, 138.47451782226562, 123.36371612548828, 30.64788818359375, -86.59881591796875, 65.831787109375, 231.357666015625, 40.42951965332031, 175.9375, 138.0428466796875, 196.2232666015625, 61.251739501953125, 36.61531066894531, -175.79052734375, -74.00224304199219, 103.3125, 93.3479232788086, 141.19192504882812, 60.0101318359375, 54.15721130371094, 84.16206359863281, 155.1087646484375, 103.33454895019531, 118.90023803710938, 51.8616943359375, 230.19876098632812, 188.24136352539062, 276.2308349609375, -89.80741882324219, -102.10079193115234, -9.630859375, 60.237579345703125, 11.242942810058594, -75.83419036865234, 183.16177368164062, 121.53324127197266, -38.19867706298828, 148.41094970703125, 92.06381225585938, -33.842620849609375, 194.83578491210938, 176.16537475585938, 105.39129638671875, 85.35842895507812, -71.68611145019531, 132.6719970703125, 85.55308532714844, -118.534423828125, 16.610488891601562, 89.61956787109375, 27.461669921875, -166.36978149414062, -115.0439453125, -73.53250122070312, 11.083686828613281, -105.66392517089844, 93.41659545898438, 28.464759826660156, 0.0, -3.3636322021484375, -46.83891296386719, 103.7659912109375, 146.27978515625, 75.85903930664062, 62.81584167480469, -20.68084716796875, 37.210723876953125, 175.91363525390625, 160.697998046875, 60.091522216796875, 163.03033447265625, -13.121368408203125, 151.56320190429688, 56.94706726074219, 26.19537353515625, 86.3021240234375, 30.66729736328125, -23.6632080078125, 97.28892517089844, 33.631412506103516, 21.36767578125, 48.42633056640625, 43.85693359375, 34.43341064453125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000474.npy"}
{"epoch": 0.9926701570680628, "step": 475, "batch_size": 128, "mean": 60.584022521972656, "std": 95.22283935546875, "min": -161.8712158203125, "p10": -41.303149414062496, "median": 39.64191436767578, "p90": 197.54149627685547, "max": 344.472412109375, "pos_frac": 0.765625, "sample": [220.46990966796875, 164.77479553222656, 0.1832275390625, 35.347251892089844, -32.566375732421875, 142.44638061523438, 85.1143798828125, 63.948482513427734, 235.941650390625, 180.24468994140625, 9.596099853515625, 7.8022918701171875, 44.22517395019531, -50.891845703125, 44.430572509765625, 64.20551300048828, 77.57412719726562, 39.17619323730469, 59.94940948486328, 139.08615112304688, 143.39263916015625, 200.26382446289062, 218.77029418945312, 91.65376281738281, 38.65299987792969, 158.1605224609375, 77.42120361328125, -27.408050537109375, 25.869434356689453, -55.3001708984375, 119.37310791015625, -92.91091918945312, 150.83392333984375, -27.441650390625, -161.8712158203125, 238.7972869873047, -136.75625610351562, 128.94647216796875, -20.25262451171875, 187.5179443359375, 171.39801025390625, -35.898040771484375, 140.1551055908203, 197.4516143798828, 153.79156494140625, 121.18994140625, -74.88290405273438, 66.841796875, -16.323532104492188, 53.5023193359375, 18.638427734375, 197.751220703125, -133.89524841308594, -35.71630859375, 147.78848266601562, 7.123931884765625, 112.15251159667969, -39.52668762207031, 124.27352905273438, 9.25653076171875, 103.86190795898438, 32.63142395019531, -65.00445556640625, 21.60504150390625, -99.47758483886719, -94.47930908203125, -83.47929382324219, -29.002716064453125, 20.823486328125, 60.42604064941406, 142.23001098632812, 38.8343505859375, 183.3642578125, 116.14842224121094, 220.74075317382812, -0.904205322265625, 191.67648315429688, 20.076160430908203, 35.26378631591797, 23.866119384765625, 28.12670135498047, 23.2264404296875, 73.26422119140625, 22.63116455078125, 10.857421875, 46.5968017578125, -104.41990661621094, 40.107635498046875, 64.96295166015625, 72.79277038574219, 12.586204528808594, 99.59807586669922, 326.8489990234375, 87.0184326171875, 29.864871978759766, 219.790283203125, -22.930389404296875, -7.094079971313477, 36.04035949707031, 104.51783752441406, 344.472412109375, -21.30010986328125, 35.63383483886719, -4.1302490234375, 243.77301025390625, 1.9589004516601562, 32.77159118652344, 20.53515625, 102.77642822265625, -19.27165985107422, 52.934425354003906, -45.44822692871094, -7.729454040527344, 286.4001159667969, 23.619964599609375, 22.682579040527344, 16.825927734375, 40.8844108581543, 75.21240234375, -9.438156127929688, 58.6566162109375, 134.40182495117188, 12.172576904296875, 214.78616333007812, 12.660408020019531, 33.488189697265625, 59.54863739013672, 58.446502685546875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000475.npy"}
{"epoch": 0.9947643979057592, "step": 476, "batch_size": 128, "mean": 65.94429016113281, "std": 110.3356704711914, "min": -198.28533935546875, "p10": -58.60287857055664, "median": 50.10256576538086, "p90": 193.61715698242188, "max": 510.8563232421875, "pos_frac": 0.7734375, "sample": [54.92732238769531, -114.1937255859375, -69.09925842285156, -86.90164184570312, 7.080596923828125, 100.96543884277344, 142.70480346679688, 22.946624755859375, 21.1590576171875, 142.64529418945312, 139.9669189453125, -53.803138732910156, 86.2246322631836, -143.46856689453125, 35.99125671386719, 193.0926513671875, 93.66668701171875, 166.69561767578125, 106.10491943359375, 27.497802734375, -40.000732421875, 89.85318756103516, 19.17108154296875, -51.45366668701172, 38.09275817871094, 167.94082641601562, 65.78317260742188, 20.30224609375, 168.56832885742188, 25.35614013671875, 16.819610595703125, -27.86944580078125, 21.9453125, 135.04904174804688, 5.579399108886719, 35.086143493652344, 145.798828125, -11.173294067382812, 43.291015625, -53.99517822265625, -11.835472106933594, 30.9271240234375, 37.412994384765625, 53.1580810546875, -58.28607177734375, 12.9072265625, -12.482593536376953, 236.99349975585938, 68.53428649902344, 91.74285888671875, 58.29052734375, -51.387420654296875, -74.696044921875, 54.849700927734375, -5.91949462890625, 196.46798706054688, 33.90412521362305, 65.95048522949219, 5.178253173828125, 191.53662109375, 331.92779541015625, -15.766632080078125, 140.69017028808594, 43.79559326171875, 36.920074462890625, 73.74876403808594, 100.67288208007812, 40.89691162109375, -40.1759033203125, -114.023681640625, 69.04920959472656, 31.24267578125, 139.9701385498047, 28.513870239257812, 510.8563232421875, -59.34209442138672, 66.4366455078125, -39.38079833984375, 156.63848876953125, 37.294189453125, 205.18182373046875, 81.64120483398438, 69.33857727050781, 4.569309234619141, 47.04705047607422, 82.05592346191406, -71.43490600585938, 41.42742156982422, -131.954833984375, 119.85888671875, 0.05096435546875, 21.16815185546875, 187.844970703125, 122.34561157226562, -119.48001098632812, 233.70484924316406, 292.78094482421875, 230.87689208984375, 117.8936538696289, 326.710205078125, 152.0161895751953, 108.34249877929688, 148.1279296875, 190.83181762695312, -156.13348388671875, 234.25643920898438, 44.69195556640625, 61.571441650390625, 127.17779541015625, 138.10670471191406, -22.592620849609375, 254.28187561035156, 81.46378326416016, -198.28533935546875, 69.02056884765625, 121.14898681640625, -25.156044006347656, 399.1620178222656, 39.879638671875, 167.51861572265625, 159.5306396484375, 2.4913177490234375, 113.0012435913086, -108.51504516601562, 3.80218505859375, 194.84100341796875, 34.0032958984375, 93.09745025634766], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000476.npy"}
{"epoch": 0.9968586387434555, "step": 477, "batch_size": 128, "mean": 68.60246276855469, "std": 116.40129089355469, "min": -261.3890380859375, "p10": -63.451699829101564, "median": 58.935890197753906, "p90": 215.38267517089844, "max": 363.69305419921875, "pos_frac": 0.703125, "sample": [-115.94085693359375, -37.2008056640625, 109.6656494140625, -55.16845703125, -35.461700439453125, 279.1656494140625, -5.221527099609375, 34.194549560546875, 211.29541015625, 173.80389404296875, -245.20716857910156, 37.391387939453125, 181.7418212890625, -60.578277587890625, -45.777587890625, 3.7837066650390625, 61.21185302734375, 91.71617889404297, 272.14752197265625, -0.066192626953125, -4.830322265625, 67.6739501953125, 94.5965576171875, 148.2144317626953, -96.71369934082031, 144.2240447998047, 43.93255615234375, 24.94683837890625, 206.57740783691406, 269.6765441894531, 17.321212768554688, 180.66378784179688, 202.98953247070312, 61.319488525390625, -136.95751953125, 31.820358276367188, 167.45489501953125, 45.264976501464844, -87.48883056640625, 0.0, 70.6876220703125, -59.794921875, 43.001258850097656, 217.41592407226562, -261.3890380859375, 192.1849365234375, 33.108245849609375, 37.883453369140625, 59.2659912109375, -27.713531494140625, 28.958816528320312, 215.0828857421875, -163.48915100097656, 165.71194458007812, -69.23829650878906, -146.63507080078125, -50.5848388671875, -50.43865203857422, 182.45472717285156, 70.5283432006836, 57.86724853515625, -78.31431579589844, 127.82891845703125, 65.12582397460938, -74.85348510742188, 6.1371307373046875, 1.2788238525390625, 216.08218383789062, 56.363128662109375, 99.23709869384766, -84.25216674804688, -42.88862609863281, 304.4722900390625, 120.67721557617188, 207.05380249023438, -12.607929229736328, 146.20187377929688, 99.42886352539062, 112.62158203125, 204.89703369140625, 95.13044738769531, 51.5169677734375, 363.69305419921875, 198.12356567382812, 131.6572265625, 10.657379150390625, -61.6192626953125, -8.4412841796875, 119.80548858642578, 66.10498046875, -23.341737747192383, 51.193084716796875, 204.3310546875, 129.54664611816406, 137.2445831298828, 76.243408203125, -41.005950927734375, 141.26358032226562, -7.10101318359375, -10.1383056640625, 82.03620147705078, -63.97242736816406, 165.03350830078125, 246.303955078125, -47.59629821777344, 225.51092529296875, 168.15301513671875, 173.72381591796875, 109.3740234375, 173.4434814453125, 307.18170166015625, -61.730377197265625, -63.22853088378906, 15.142684936523438, 4.838249206542969, 58.60578918457031, 223.17897033691406, 133.58087158203125, 58.08404541015625, 293.38525390625, 45.503700256347656, 245.4957275390625, 7.754913330078125, 170.43685913085938, -20.844200134277344, 128.9365234375, 92.58224487304688, 29.798309326171875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-ultrafeedback-8xh200-20260410-155037/margin_logs/step_0000477.npy"}