Model: W-61/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312 Source: Original Platform
682 lines
1.1 MiB
682 lines
1.1 MiB
{"epoch": 0.0, "step": 1, "batch_size": 64, "mean": -0.02287048101425171, "std": 0.42023447155952454, "min": -1.4034271240234375, "p10": -0.46674575805664065, "median": 0.04234886169433594, "p90": 0.4323463439941407, "max": 0.89263916015625, "pos_frac": 0.53125, "sample": [-0.06523895263671875, 0.436798095703125, 0.27811431884765625, -0.9194221496582031, 0.018890380859375, 0.20587158203125, 0.18878173828125, -0.3968696594238281, 0.26206207275390625, 0.2470550537109375, -0.040912628173828125, 0.4394989013671875, -0.44133758544921875, -0.39148712158203125, 0.2764854431152344, 0.89263916015625, -0.42584991455078125, -0.46125030517578125, -0.8638992309570312, -0.3508758544921875, 0.371368408203125, 0.887847900390625, -0.382904052734375, 0.36145782470703125, -0.4890003204345703, 0.052455902099609375, -0.036136627197265625, 0.23079299926757812, 0.2469482421875, 0.1643218994140625, -0.07129669189453125, 0.2790794372558594, 0.3637123107910156, -0.8916168212890625, 0.03298759460449219, -0.2790107727050781, -0.17860984802246094, 0.23892593383789062, 0.05171012878417969, -0.2564239501953125, -0.14655303955078125, 0.27777862548828125, 0.0810394287109375, -1.4034271240234375, -0.28739166259765625, -0.1489429473876953, 0.44918060302734375, 0.1693286895751953, 0.10933303833007812, -0.14766693115234375, -0.40944671630859375, -0.18532562255859375, 0.6261310577392578, -0.20856857299804688, 0.602569580078125, 0.05538177490234375, 0.1505279541015625, 0.1313800811767578, -0.006317138671875, 0.42195892333984375, -0.29936981201171875, -0.4691009521484375, 0.16705322265625, -0.5789260864257812], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000001.npy"}
|
|
{"epoch": 0.0014684287812041115, "step": 2, "batch_size": 64, "mean": -0.06572240591049194, "std": 0.3523969054222107, "min": -0.9291305541992188, "p10": -0.46334152221679686, "median": -0.05502510070800781, "p90": 0.3672500610351563, "max": 1.0444793701171875, "pos_frac": 0.4375, "sample": [-0.2829437255859375, 0.3027191162109375, -0.19867706298828125, -0.3062286376953125, 0.10318756103515625, 0.20131683349609375, -0.34906005859375, 0.2802886962890625, 0.1914520263671875, -0.31072998046875, 0.08922195434570312, 0.10284614562988281, -0.03655242919921875, -0.0604095458984375, -0.06208038330078125, 0.32562255859375, -0.37982177734375, 0.2746162414550781, -0.049640655517578125, 0.3752174377441406, -0.103973388671875, 0.0699462890625, 0.36417388916015625, -0.033428192138671875, 0.37265777587890625, -0.3787078857421875, -0.6610565185546875, 0.4720420837402344, 0.47701263427734375, -0.27928924560546875, -0.44719696044921875, -0.0965118408203125, -0.7628555297851562, 0.046764373779296875, 0.06670379638671875, -0.9291305541992188, -0.7122802734375, -0.16554832458496094, 0.1485595703125, -0.07539939880371094, 0.2588920593261719, 0.039890289306640625, 0.201690673828125, 0.0623016357421875, 1.0444793701171875, -0.37696075439453125, -0.02794647216796875, -0.223297119140625, -0.35730743408203125, -0.1309051513671875, -0.3106689453125, -0.11409187316894531, -0.1669769287109375, 0.131317138671875, -0.2361297607421875, 0.4093780517578125, -0.6485977172851562, 0.36856842041015625, -0.1951904296875, -0.4702606201171875, -0.7624168395996094, 0.008928298950195312, -0.31630706787109375, 0.022550582885742188], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000002.npy"}
|
|
{"epoch": 0.002936857562408223, "step": 3, "batch_size": 64, "mean": -0.01640373468399048, "std": 0.33150625228881836, "min": -0.82525634765625, "p10": -0.47623100280761715, "median": -0.032321929931640625, "p90": 0.46041240692138685, "max": 0.7672195434570312, "pos_frac": 0.46875, "sample": [-0.2111034393310547, -0.44612884521484375, -0.00292205810546875, 0.7672195434570312, -0.2964191436767578, 0.008512496948242188, 0.2647132873535156, 0.013631820678710938, -0.1563262939453125, 0.009889602661132812, -0.82525634765625, 0.1518840789794922, -0.1895904541015625, -0.329742431640625, -0.08674240112304688, -0.50775146484375, 0.11066436767578125, -0.11901473999023438, 0.214202880859375, 0.08998489379882812, -0.6874618530273438, -0.04742431640625, 0.7362594604492188, -0.1116180419921875, 0.32257080078125, 0.05938720703125, 0.11513137817382812, 0.4285144805908203, 0.18306350708007812, -0.34081268310546875, 0.5052871704101562, 0.62109375, 0.15777206420898438, -0.051849365234375, 0.06997108459472656, -0.0801239013671875, -0.499176025390625, -0.20870208740234375, -0.08841705322265625, -0.09004974365234375, -0.190826416015625, 0.10590362548828125, -0.4908599853515625, -0.02978515625, 0.14806365966796875, -0.10419464111328125, -0.03485870361328125, 0.5008926391601562, -0.4891319274902344, -0.10234832763671875, 0.18095970153808594, -0.3004341125488281, 0.18291091918945312, 0.06221771240234375, -0.105560302734375, 0.27085113525390625, 0.47408294677734375, -0.38299560546875, -0.2723388671875, 0.6761932373046875, 0.012342453002929688, 0.078887939453125, -0.0719146728515625, -0.6210174560546875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000003.npy"}
|
|
{"epoch": 0.004405286343612335, "step": 4, "batch_size": 64, "mean": 0.0101853609085083, "std": 0.40870770812034607, "min": -1.098907470703125, "p10": -0.48458175659179686, "median": 0.02813720703125, "p90": 0.5255672454833985, "max": 0.9482345581054688, "pos_frac": 0.53125, "sample": [-0.4273338317871094, -0.22176742553710938, 0.10195159912109375, -0.05615234375, -0.38072967529296875, -0.2880859375, 0.8399810791015625, -1.098907470703125, -0.2165679931640625, -0.4877471923828125, -0.09852027893066406, 0.5303840637207031, 0.09703826904296875, 0.37158203125, 0.0494842529296875, -0.08495903015136719, -0.528533935546875, -0.20465469360351562, -0.08935165405273438, 0.08026313781738281, -0.45526885986328125, 0.0806427001953125, 0.138702392578125, -0.446441650390625, 0.390655517578125, -0.2508087158203125, 0.14089202880859375, 0.44715118408203125, -0.06812095642089844, -0.84954833984375, -0.02581024169921875, -0.08330535888671875, -0.2428741455078125, 0.43235015869140625, 0.023534774780273438, 0.38907623291015625, -0.05751800537109375, 0.7156219482421875, 0.3605537414550781, 0.1596221923828125, -0.47719573974609375, -0.18732452392578125, 0.29753875732421875, 0.7125701904296875, 0.103973388671875, 0.0317535400390625, 0.2245941162109375, 0.283355712890625, -0.09208297729492188, -0.5011672973632812, 0.5593852996826172, 0.719696044921875, 0.3852806091308594, -0.7695693969726562, 0.0245208740234375, 0.08063316345214844, 0.9482345581054688, -0.1696624755859375, 0.04378509521484375, 0.16285324096679688, 0.5143280029296875, 0.06553459167480469, -0.4234161376953125, -0.572235107421875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000004.npy"}
|
|
{"epoch": 0.005873715124816446, "step": 5, "batch_size": 64, "mean": 0.02979910373687744, "std": 0.43392759561538696, "min": -1.1028594970703125, "p10": -0.48680229187011714, "median": 0.011505126953125, "p90": 0.46049728393554695, "max": 1.4181747436523438, "pos_frac": 0.515625, "sample": [-0.14392852783203125, 0.381744384765625, -0.22560882568359375, -0.24718475341796875, 0.7384109497070312, -0.557861328125, 0.3450736999511719, 0.01520538330078125, -0.1266040802001953, -0.123291015625, -0.42937469482421875, 0.441650390625, 0.09767723083496094, 0.15126800537109375, -0.17428970336914062, 0.1509552001953125, -0.23661041259765625, -0.4689674377441406, 0.2988395690917969, -0.5326080322265625, 0.15410995483398438, 0.19065093994140625, -0.09100341796875, 0.46857452392578125, 0.05630302429199219, -0.2080078125, 1.1636581420898438, 0.31301116943359375, -0.244384765625, 0.04825592041015625, -0.04732322692871094, 0.2502326965332031, 0.6631622314453125, 0.08171272277832031, -0.0837860107421875, -0.3185272216796875, 0.48700714111328125, 0.24405288696289062, 0.01763153076171875, 0.42409515380859375, -0.02172088623046875, 0.15001678466796875, 0.29265594482421875, -1.1028594970703125, 1.4181747436523438, -0.3079986572265625, -0.07465362548828125, -0.133514404296875, -0.49444580078125, -0.797515869140625, -0.15690994262695312, -0.6249332427978516, 0.198638916015625, -0.26021575927734375, -0.08252334594726562, 0.00780487060546875, 1.065704345703125, -0.10533905029296875, -0.5180511474609375, 0.4348182678222656, -0.41757965087890625, 0.02445220947265625, 0.2408905029296875, 0.24832534790039062], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000005.npy"}
|
|
{"epoch": 0.007342143906020558, "step": 6, "batch_size": 64, "mean": 0.0043981969356536865, "std": 0.3865034878253937, "min": -0.95294189453125, "p10": -0.5006668090820312, "median": 0.041072845458984375, "p90": 0.3853317260742188, "max": 1.2647705078125, "pos_frac": 0.546875, "sample": [-0.32056427001953125, 0.18107986450195312, 0.0026073455810546875, 0.1283416748046875, 0.0979461669921875, 0.3321533203125, -0.7009315490722656, -0.4493598937988281, 0.382843017578125, -0.56402587890625, 0.5866889953613281, 0.24840545654296875, -0.95294189453125, 0.2874317169189453, -0.18344497680664062, -0.8419647216796875, -0.089691162109375, -0.24072265625, 0.23504257202148438, 0.03292083740234375, -0.8724365234375, -0.6091880798339844, 0.049224853515625, -0.11350250244140625, -0.11967658996582031, -0.093994140625, 0.3863983154296875, 0.11908721923828125, 0.21823883056640625, -0.14211654663085938, 0.00116729736328125, 0.2959423065185547, 0.6786117553710938, -0.059162139892578125, -0.0280303955078125, 0.224029541015625, -0.3869781494140625, -0.3831939697265625, 0.14795684814453125, 0.28681182861328125, -0.160980224609375, 0.24517822265625, 0.40395355224609375, 0.1644439697265625, 0.177886962890625, 0.24161338806152344, 0.3327770233154297, 0.2385425567626953, 0.422637939453125, -0.4362335205078125, -0.19673919677734375, -0.04328155517578125, 0.1487884521484375, 0.15493011474609375, 0.2411956787109375, -0.10338592529296875, 0.17575454711914062, -0.2304534912109375, -0.17218017578125, 0.5779800415039062, -0.1912841796875, -0.5226554870605469, 1.2647705078125, -0.2227783203125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000006.npy"}
|
|
{"epoch": 0.00881057268722467, "step": 7, "batch_size": 64, "mean": 0.01658591628074646, "std": 0.4097239673137665, "min": -0.9780502319335938, "p10": -0.49665374755859376, "median": 0.03392601013183594, "p90": 0.4979003906250001, "max": 1.286529541015625, "pos_frac": 0.546875, "sample": [-0.08548736572265625, -0.39537811279296875, 0.07315444946289062, 0.06892776489257812, -0.9780502319335938, 0.405242919921875, -0.06476974487304688, 0.017822265625, -0.5288543701171875, 0.23825836181640625, -0.384185791015625, 0.6207046508789062, -0.1777496337890625, 0.0109100341796875, 0.3217010498046875, 0.4531402587890625, 0.13829803466796875, -0.1801910400390625, -0.12461280822753906, -0.402252197265625, -0.197601318359375, 0.1327667236328125, 0.5052223205566406, 0.18609237670898438, 0.3572998046875, -0.10807037353515625, -0.24482345581054688, -0.01988983154296875, 0.08131027221679688, 0.041748046875, 0.16432571411132812, 0.039699554443359375, 0.2659721374511719, 0.287445068359375, 0.6437530517578125, -0.316650390625, -0.00302886962890625, 0.07382965087890625, -0.704010009765625, -0.5416336059570312, -0.570648193359375, -0.481475830078125, 0.400146484375, 0.35463714599609375, -0.07279205322265625, 0.106109619140625, 0.4808158874511719, 0.1339874267578125, 0.0281524658203125, -0.29206085205078125, 1.286529541015625, -0.31897735595703125, -0.5031585693359375, 1.1160736083984375, 0.050018310546875, 0.04727935791015625, -0.8425521850585938, 0.5114364624023438, 0.5173835754394531, -0.21199798583984375, 0.2697601318359375, -0.2856597900390625, -0.286163330078125, -0.0457305908203125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000007.npy"}
|
|
{"epoch": 0.010279001468428781, "step": 8, "batch_size": 64, "mean": -0.028907448053359985, "std": 0.38289621472358704, "min": -1.013671875, "p10": -0.6849678039550781, "median": 0.016452789306640625, "p90": 0.4017358779907227, "max": 0.6231536865234375, "pos_frac": 0.53125, "sample": [0.0328521728515625, 0.4611053466796875, 0.01796722412109375, -0.008819580078125, -0.6683425903320312, -0.03575325012207031, 0.3965435028076172, 0.6231536865234375, 0.0149383544921875, 0.02257537841796875, -0.07465744018554688, 0.2729301452636719, 0.23681640625, 0.14670562744140625, 0.09227752685546875, 0.2053375244140625, -0.08277130126953125, -0.026882171630859375, 0.3332366943359375, -0.0292205810546875, 0.23574066162109375, -0.7084808349609375, -0.064300537109375, 0.21820068359375, -0.025579452514648438, -0.17706298828125, 0.0821685791015625, 0.3417072296142578, 0.0662689208984375, -0.7982025146484375, 0.29364013671875, -0.6920928955078125, 0.06693267822265625, 0.5344696044921875, 0.1441650390625, 0.149169921875, -0.255706787109375, 0.0915679931640625, -0.001617431640625, 0.379852294921875, -0.24546432495117188, -0.6542892456054688, -0.1901702880859375, -0.2783660888671875, -0.28334808349609375, -1.013671875, -1.0123443603515625, 0.403961181640625, 0.23241424560546875, 0.29299163818359375, -0.8067207336425781, -0.731689453125, -0.08203125, -0.18866729736328125, 0.5494003295898438, -0.18193817138671875, -0.277069091796875, -0.30187225341796875, 0.0109405517578125, -0.227264404296875, 0.14850807189941406, 0.49625396728515625, 0.17012786865234375, 0.5093994140625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000008.npy"}
|
|
{"epoch": 0.011747430249632892, "step": 9, "batch_size": 64, "mean": 0.011951416730880737, "std": 0.42925095558166504, "min": -0.7931365966796875, "p10": -0.5231101989746094, "median": -0.0162506103515625, "p90": 0.5921791076660157, "max": 1.1223907470703125, "pos_frac": 0.484375, "sample": [-0.33533477783203125, -0.48809051513671875, 0.7401809692382812, 0.5734710693359375, 0.12963104248046875, -0.5185012817382812, 0.6001968383789062, -0.203765869140625, -0.08232879638671875, 0.0623626708984375, -0.711273193359375, 0.182037353515625, -0.071563720703125, 0.004364013671875, -0.2239227294921875, -0.7686767578125, 0.2171630859375, -0.4033164978027344, -0.459136962890625, -0.6567611694335938, -0.7931365966796875, 0.1706562042236328, -0.23957061767578125, 0.05313873291015625, -0.10013580322265625, -0.09357452392578125, 0.11075973510742188, 0.37469482421875, 0.06093406677246094, 0.9036140441894531, 0.042133331298828125, 0.40459632873535156, -0.100494384765625, -0.02325439453125, -0.5252513885498047, -0.3830413818359375, -0.050289154052734375, -0.07569313049316406, 0.78515625, 0.08303451538085938, -0.47925567626953125, -0.597808837890625, -0.52508544921875, 0.09171295166015625, 0.089935302734375, -0.09490203857421875, -0.009246826171875, -0.1472930908203125, 0.893890380859375, 0.12850379943847656, -0.0339813232421875, -0.15255355834960938, 0.237548828125, 0.6620349884033203, 0.48407745361328125, 1.1223907470703125, 0.3302154541015625, 0.48126220703125, 0.20923233032226562, 0.45648193359375, -0.3697052001953125, 0.44454193115234375, -0.4300689697265625, -0.218048095703125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000009.npy"}
|
|
{"epoch": 0.013215859030837005, "step": 10, "batch_size": 64, "mean": 0.05922728776931763, "std": 0.4276774227619171, "min": -1.809234619140625, "p10": -0.31093311309814453, "median": 0.028291702270507812, "p90": 0.44323539733886724, "max": 1.3619842529296875, "pos_frac": 0.515625, "sample": [0.1492767333984375, 0.2663116455078125, 0.05120277404785156, -0.10093498229980469, -0.2581672668457031, 0.264129638671875, -0.0627593994140625, 0.575042724609375, 0.5890731811523438, -0.4343719482421875, -0.460205078125, 0.425140380859375, -0.7145843505859375, 0.27740478515625, -0.23386383056640625, 0.2330474853515625, -0.6119117736816406, 0.15674209594726562, 0.12831878662109375, 0.09332466125488281, 0.3520698547363281, 0.6863365173339844, 0.3076934814453125, 0.2527904510498047, -0.14337921142578125, -0.09195709228515625, -0.4576416015625, -0.1806793212890625, 0.3683452606201172, -0.0061359405517578125, 0.160858154296875, 0.4324607849121094, -0.09528350830078125, -0.25492095947265625, -0.1353912353515625, -0.20672607421875, -1.809234619140625, 0.3697013854980469, 0.2397289276123047, -0.06263542175292969, -0.3018474578857422, -0.18224334716796875, 0.17069435119628906, 0.75079345703125, 0.3128662109375, -0.21875762939453125, 0.36798095703125, -0.015399932861328125, 0.4298820495605469, -0.1521759033203125, -0.091827392578125, -0.05943107604980469, 0.11089324951171875, 0.24083709716796875, -0.2130889892578125, -0.1064300537109375, -0.0140533447265625, 0.38431549072265625, 0.44785308837890625, 1.3619842529296875, 0.8225784301757812, -0.31482696533203125, 0.0053806304931640625, -0.0036468505859375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000010.npy"}
|
|
{"epoch": 0.014684287812041116, "step": 11, "batch_size": 64, "mean": 0.04697957634925842, "std": 0.38415780663490295, "min": -0.813446044921875, "p10": -0.48947353363037105, "median": 0.07610893249511719, "p90": 0.46786460876464847, "max": 1.0652923583984375, "pos_frac": 0.546875, "sample": [0.04373931884765625, 0.9750518798828125, 0.39586448669433594, 0.18157386779785156, -0.003025054931640625, -0.0247039794921875, 0.11452484130859375, 0.07768630981445312, -0.11113929748535156, 0.34931182861328125, -0.1962451934814453, 0.1110382080078125, 0.10401153564453125, 0.2312774658203125, -0.813446044921875, 0.08250045776367188, 0.44107818603515625, -0.24770736694335938, -0.229278564453125, 0.08179092407226562, 0.4631061553955078, 0.3653717041015625, 0.090423583984375, -0.14052581787109375, 0.2670135498046875, 0.46509552001953125, -0.055999755859375, 0.09597015380859375, -0.12481689453125, 0.08760833740234375, 0.1780548095703125, -0.50384521484375, 0.099700927734375, -0.177581787109375, 0.4690513610839844, 0.8044281005859375, 0.610748291015625, 1.0652923583984375, -0.6732635498046875, -0.4589214324951172, 0.06176948547363281, 0.21372604370117188, -0.5949134826660156, -0.535858154296875, 0.2673912048339844, -0.013370513916015625, 0.40496826171875, -0.25872230529785156, -0.286224365234375, 0.2828521728515625, -0.2717742919921875, -0.5025672912597656, 0.49764251708984375, -0.5598907470703125, 0.12439727783203125, -0.12111091613769531, -0.352630615234375, -0.1207427978515625, 0.07453155517578125, 0.8794097900390625, -0.23491668701171875, -0.05254554748535156, -0.2314739227294922, -0.1540679931640625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000011.npy"}
|
|
{"epoch": 0.016152716593245228, "step": 12, "batch_size": 64, "mean": 0.03697209060192108, "std": 0.3862837553024292, "min": -1.2092437744140625, "p10": -0.400642967224121, "median": 0.037395477294921875, "p90": 0.4642551422119141, "max": 1.49822998046875, "pos_frac": 0.546875, "sample": [0.5009765625, -0.3280487060546875, 0.2237091064453125, -1.2092437744140625, 0.1812744140625, -0.18491363525390625, 0.02790069580078125, 0.16443252563476562, -0.14470672607421875, 0.4172401428222656, 0.0468902587890625, -0.1834869384765625, -0.20619964599609375, -0.05327606201171875, 0.16124725341796875, 0.25212860107421875, -0.33998870849609375, 0.443023681640625, 0.20025253295898438, -0.33847808837890625, -0.252166748046875, -0.13727569580078125, -0.084259033203125, -0.6483840942382812, -0.4399833679199219, 0.12592697143554688, 0.1318359375, 1.49822998046875, -0.2228717803955078, 0.00455474853515625, -0.4975738525390625, 0.47589111328125, 0.2904510498046875, 0.3227043151855469, 0.31633758544921875, 0.22776222229003906, -0.110198974609375, -0.50762939453125, -0.23465728759765625, -0.18487167358398438, -0.6324615478515625, 0.4502601623535156, -0.4266376495361328, 0.4862632751464844, 0.1915130615234375, 0.10385894775390625, 0.287506103515625, -0.09662628173828125, 0.1368570327758789, 0.47025299072265625, -0.04898643493652344, 0.023736953735351562, -0.1199951171875, 0.3751697540283203, 0.058807373046875, -0.09234619140625, 0.05816650390625, -0.2723674774169922, -0.057621002197265625, -0.03347015380859375, 0.35550689697265625, 0.7174720764160156, 0.5589599609375, 0.16783905029296875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000012.npy"}
|
|
{"epoch": 0.01762114537444934, "step": 13, "batch_size": 64, "mean": 0.0019735991954803467, "std": 0.42141592502593994, "min": -1.0727996826171875, "p10": -0.4444110870361328, "median": -0.037906646728515625, "p90": 0.48707790374755877, "max": 1.5062255859375, "pos_frac": 0.484375, "sample": [-0.1951446533203125, -0.2697868347167969, -0.21967315673828125, -0.2483367919921875, -0.7089767456054688, 0.06937408447265625, 0.6371231079101562, 0.08794021606445312, 0.34210968017578125, -0.22402572631835938, -0.46089935302734375, 0.13953399658203125, -0.6208114624023438, -0.030735015869140625, -0.10294914245605469, 1.5062255859375, -0.229705810546875, -0.4404296875, -0.13364219665527344, -0.324066162109375, 0.13097572326660156, -0.09925079345703125, -0.2651824951171875, 0.11240386962890625, 0.43416404724121094, -0.347320556640625, 0.00920867919921875, -0.13610267639160156, -0.1778717041015625, -0.4224090576171875, -0.4863166809082031, 0.5259933471679688, -0.05609893798828125, 0.24992752075195312, -1.0727996826171875, 0.08640289306640625, -0.19818878173828125, 0.5034961700439453, 0.10600090026855469, 0.017578125, 0.4081306457519531, -0.10482406616210938, 0.56341552734375, 0.20801925659179688, 0.220489501953125, 0.11935806274414062, -0.13698577880859375, 0.037181854248046875, 0.23111724853515625, -0.045078277587890625, -0.59954833984375, 0.5081024169921875, -0.3138275146484375, -0.06905364990234375, -0.36028289794921875, 0.01364898681640625, 0.31551361083984375, 0.0404510498046875, 1.41162109375, -0.4461174011230469, 0.220367431640625, 0.44876861572265625, 0.10567474365234375, -0.13756561279296875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000013.npy"}
|
|
{"epoch": 0.01908957415565345, "step": 14, "batch_size": 64, "mean": 0.005887240171432495, "std": 0.47432461380958557, "min": -1.7574920654296875, "p10": -0.5578521728515624, "median": 0.05684852600097656, "p90": 0.5454355239868165, "max": 0.9806365966796875, "pos_frac": 0.515625, "sample": [-0.702301025390625, -0.06643104553222656, -0.12323188781738281, -0.3801918029785156, 0.8169097900390625, -0.10453033447265625, -0.59210205078125, -0.342376708984375, 0.26978302001953125, 0.28733062744140625, 0.06116485595703125, -0.17327499389648438, -0.298004150390625, 0.9806365966796875, -0.09050369262695312, -0.145599365234375, -0.1278057098388672, -0.18743133544921875, 0.19170761108398438, -0.40328025817871094, 0.58392333984375, -0.0738525390625, -0.3086700439453125, 0.4253387451171875, 0.4251556396484375, -0.17115402221679688, -1.7574920654296875, 0.10682296752929688, 0.4739227294921875, 0.7368316650390625, 0.237548828125, 0.1344146728515625, 0.30023193359375, 0.21122360229492188, 0.2551860809326172, 0.521484375, 0.12491226196289062, 0.091217041015625, 0.23730850219726562, 0.09773445129394531, -0.613494873046875, -0.1748199462890625, -1.1167755126953125, 0.35277557373046875, -0.17748641967773438, 0.052532196044921875, 0.33838653564453125, -0.2690086364746094, -0.477935791015625, -0.6894073486328125, -0.68414306640625, 0.368316650390625, 0.33673095703125, -0.18532943725585938, 0.33313751220703125, 0.8813629150390625, 0.4001617431640625, 0.697418212890625, -0.4406013488769531, -0.301727294921875, 0.5557003021240234, -0.31392860412597656, 0.16382598876953125, -0.18146324157714844], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000014.npy"}
|
|
{"epoch": 0.020558002936857563, "step": 15, "batch_size": 64, "mean": 0.042571812868118286, "std": 0.3996790945529938, "min": -1.4478912353515625, "p10": -0.36165161132812496, "median": 0.03989601135253906, "p90": 0.4236572265625, "max": 1.061248779296875, "pos_frac": 0.59375, "sample": [-0.01983642578125, 1.061248779296875, 0.36983680725097656, -0.13118934631347656, 0.5340576171875, 0.12329673767089844, -0.22081756591796875, -0.59490966796875, 0.343353271484375, 0.24109649658203125, -0.23749542236328125, -0.2243366241455078, 0.22206878662109375, -0.150115966796875, 0.40198326110839844, 0.308135986328125, 0.03014373779296875, -0.04199981689453125, 0.4159393310546875, 0.2230224609375, 0.35283660888671875, -1.4478912353515625, 0.30454254150390625, 0.423614501953125, -0.6949138641357422, 0.18471527099609375, -0.03923797607421875, 0.5951766967773438, -0.7768402099609375, 0.369110107421875, -0.322601318359375, -0.5036239624023438, -0.0906524658203125, 0.007541656494140625, 0.1980438232421875, 0.37091064453125, -0.2776336669921875, 0.6626739501953125, 0.03383636474609375, -0.1868419647216797, -0.09607696533203125, 0.04303550720214844, 0.05637550354003906, 0.03675651550292969, 0.38182830810546875, -0.21947479248046875, 0.13816070556640625, -0.2112598419189453, 0.1972808837890625, 0.00920867919921875, 0.23748016357421875, -0.037387847900390625, -0.2965087890625, 0.0299530029296875, 0.05367279052734375, 0.45801544189453125, 0.7099609375, -0.1402587890625, 0.423675537109375, -0.378387451171875, -0.14821624755859375, -0.8038597106933594, 0.12108230590820312, 0.343292236328125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000015.npy"}
|
|
{"epoch": 0.022026431718061675, "step": 16, "batch_size": 64, "mean": 0.01766011118888855, "std": 0.3466501533985138, "min": -0.86865234375, "p10": -0.3907011032104492, "median": 0.06450271606445312, "p90": 0.39504623413085954, "max": 0.8655319213867188, "pos_frac": 0.546875, "sample": [0.18980979919433594, 0.66357421875, -0.6668243408203125, -0.0475921630859375, 0.17347335815429688, -0.15378189086914062, 0.213409423828125, -0.029193878173828125, -0.24835968017578125, 0.31072235107421875, -0.1658172607421875, 0.41345977783203125, 0.7739486694335938, -0.20687103271484375, 0.1680145263671875, -0.123565673828125, -0.4710960388183594, -0.294830322265625, -0.1444091796875, 0.129486083984375, 0.352081298828125, 0.2570629119873047, 0.17861175537109375, -0.08028411865234375, 0.10701179504394531, -0.5972499847412109, -0.86865234375, -0.10491561889648438, -0.08242225646972656, -0.13831520080566406, 0.2745094299316406, 0.0635986328125, 0.085968017578125, -0.3639488220214844, 0.0897979736328125, 0.299835205078125, 0.45807647705078125, -0.3952484130859375, 0.18021392822265625, 0.06528472900390625, 0.2135162353515625, -0.3683624267578125, 0.1837310791015625, 0.068145751953125, 0.47052764892578125, 0.20203781127929688, 0.11396980285644531, -0.38009071350097656, -0.1209259033203125, 0.063720703125, 0.2963829040527344, 0.073028564453125, -0.5402679443359375, 0.17634201049804688, -0.036029815673828125, -0.3296661376953125, 0.026758193969726562, 0.2249755859375, 0.8655319213867188, 0.760955810546875, -0.0105133056640625, -0.2221527099609375, -0.11468887329101562, -0.751251220703125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000016.npy"}
|
|
{"epoch": 0.023494860499265784, "step": 17, "batch_size": 64, "mean": 0.04995712637901306, "std": 0.3403526842594147, "min": -1.0746307373046875, "p10": -0.38928680419921874, "median": 0.10266304016113281, "p90": 0.46135005950927743, "max": 0.7584075927734375, "pos_frac": 0.5625, "sample": [-0.39589691162109375, 0.11391067504882812, 0.10315704345703125, 0.08905792236328125, -0.4517021179199219, -0.2990150451660156, 0.4117584228515625, -1.0746307373046875, 0.2425994873046875, -0.2827720642089844, 0.264434814453125, 0.4463043212890625, 0.2676239013671875, 0.4098358154296875, -0.05014991760253906, -0.41629791259765625, -0.3270225524902344, -0.06249809265136719, -0.08709335327148438, 0.5214004516601562, -0.151824951171875, 0.10216903686523438, 0.3195533752441406, 0.2023773193359375, -0.37386322021484375, 0.08351898193359375, 0.46779823303222656, 0.02637481689453125, 0.2725067138671875, 0.5368423461914062, -0.17400360107421875, 0.5139389038085938, -0.07216644287109375, -0.20311737060546875, 0.12566375732421875, -0.1335601806640625, -0.2041168212890625, 0.1940593719482422, -0.36452674865722656, 0.2518310546875, 0.7584075927734375, -0.2672615051269531, -0.10518264770507812, 0.17900657653808594, -0.4076385498046875, 0.2266387939453125, 0.2775306701660156, -0.4835968017578125, 0.14487457275390625, -0.02825164794921875, 0.1732940673828125, 0.31543731689453125, 0.4173126220703125, -0.2826385498046875, 0.16281890869140625, -0.1549072265625, 0.69805908203125, -0.15474319458007812, 0.5043487548828125, -0.47869110107421875, 0.37574195861816406, 0.26998138427734375, 0.3222503662109375, -0.10799407958984375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000017.npy"}
|
|
{"epoch": 0.024963289280469897, "step": 18, "batch_size": 64, "mean": 0.0720413327217102, "std": 0.34716373682022095, "min": -0.6702194213867188, "p10": -0.4195545196533203, "median": 0.11382770538330078, "p90": 0.46426315307617194, "max": 0.8337326049804688, "pos_frac": 0.625, "sample": [0.15647506713867188, -0.484649658203125, 0.8337326049804688, -0.6702194213867188, 0.4904441833496094, -0.5107192993164062, -0.2493000030517578, 0.1615447998046875, -0.147064208984375, 0.19940757751464844, -0.65216064453125, 0.06730079650878906, 0.34656524658203125, 0.4149818420410156, -0.3384246826171875, 0.7154769897460938, -0.047672271728515625, -0.013235092163085938, 0.24217605590820312, -0.32990264892578125, 0.028720855712890625, -0.11266326904296875, 0.57470703125, 0.09141731262207031, 0.3049888610839844, 0.19597434997558594, 0.6939010620117188, 0.4122161865234375, -0.0227508544921875, 0.15872955322265625, 0.04019927978515625, 0.2205486297607422, 0.39611053466796875, -0.17760467529296875, 0.1887359619140625, -0.3056068420410156, 0.3214111328125, 0.24409866333007812, -0.13922119140625, 0.251312255859375, 0.2051715850830078, 0.15720367431640625, 0.00252532958984375, 0.45044708251953125, 0.470184326171875, 0.0811614990234375, 0.41426849365234375, -0.37943458557128906, -0.292205810546875, 0.5903282165527344, 0.4344329833984375, 0.05542945861816406, -0.550323486328125, -0.4276237487792969, 0.09030914306640625, 0.13623809814453125, 0.3430023193359375, -0.12582778930664062, -0.5130157470703125, 0.36625099182128906, -0.17663002014160156, -0.400726318359375, -0.0260009765625, 0.15549850463867188], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000018.npy"}
|
|
{"epoch": 0.02643171806167401, "step": 19, "batch_size": 64, "mean": 0.03309273719787598, "std": 0.37767040729522705, "min": -0.919586181640625, "p10": -0.41548614501953124, "median": 0.03559398651123047, "p90": 0.4299766540527344, "max": 1.22125244140625, "pos_frac": 0.546875, "sample": [0.3003101348876953, -0.1262950897216797, -0.1925811767578125, -0.1210784912109375, 0.15720367431640625, -0.549346923828125, 0.28835296630859375, 0.057231903076171875, 0.36476707458496094, 0.87115478515625, 0.040771484375, 0.20681381225585938, 0.3739013671875, -0.1371917724609375, 0.37469482421875, -0.7832794189453125, -0.5169677734375, -0.17616653442382812, -0.551971435546875, 0.1899871826171875, -0.3394775390625, 0.001903533935546875, -0.056537628173828125, -0.4267730712890625, -0.15340232849121094, 0.3958759307861328, 0.381744384765625, -0.419097900390625, -0.26918792724609375, 0.17431640625, -0.25457000732421875, 0.2967243194580078, 0.132476806640625, 0.1977519989013672, 0.4660797119140625, -0.19588851928710938, -0.20223236083984375, 0.12571144104003906, 0.28264617919921875, -0.919586181640625, -0.08292007446289062, -0.2020111083984375, -0.10102081298828125, -0.26336669921875, 0.030416488647460938, 0.39019012451171875, 0.10535430908203125, 0.3272552490234375, 0.2432708740234375, -0.10577964782714844, 0.475311279296875, -0.06653594970703125, 0.31412506103515625, 0.0941619873046875, 0.42288970947265625, 0.016330718994140625, 0.6128311157226562, -0.3283042907714844, -0.4070587158203125, 0.433013916015625, 1.22125244140625, -0.3986701965332031, -0.3443450927734375, 0.44275665283203125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000019.npy"}
|
|
{"epoch": 0.027900146842878122, "step": 20, "batch_size": 64, "mean": 0.0407865047454834, "std": 0.3044738173484802, "min": -0.8436050415039062, "p10": -0.34591827392578123, "median": 0.04765033721923828, "p90": 0.44367828369140627, "max": 0.69415283203125, "pos_frac": 0.5625, "sample": [0.0401611328125, -0.1297321319580078, -0.013437271118164062, 0.185089111328125, -0.246978759765625, -0.012990951538085938, -0.02562713623046875, 0.4481048583984375, -0.42668914794921875, 0.48529052734375, -0.20827865600585938, 0.17706298828125, 0.5724639892578125, 0.2337207794189453, -0.347381591796875, -0.15204238891601562, -0.4179573059082031, 0.34481048583984375, -0.34444427490234375, 0.433349609375, 0.25765228271484375, -0.244598388671875, -0.08906173706054688, 0.005645751953125, -0.03691864013671875, -0.12346267700195312, 0.04225921630859375, 0.2571830749511719, -0.13786697387695312, -0.16392135620117188, 0.06283950805664062, -0.0994873046875, 0.10725021362304688, 0.11693572998046875, -0.23664283752441406, -0.09218597412109375, 0.576507568359375, -0.8003082275390625, -0.13131332397460938, -0.34654998779296875, 0.69415283203125, -0.153076171875, 0.20285797119140625, 0.22525787353515625, 0.1842803955078125, 0.10416603088378906, 0.3072357177734375, 0.15179443359375, 0.33367156982421875, -0.11705780029296875, -0.8436050415039062, 0.3153839111328125, 0.0039730072021484375, 0.14592742919921875, 0.35906219482421875, -0.4223747253417969, 0.076934814453125, 0.05304145812988281, 0.17299461364746094, -0.03491783142089844, 0.49225616455078125, 0.25035858154296875, 0.5001907348632812, 0.08937835693359375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000020.npy"}
|
|
{"epoch": 0.02936857562408223, "step": 21, "batch_size": 64, "mean": 0.11629366874694824, "std": 0.34868374466896057, "min": -0.542572021484375, "p10": -0.3007652282714844, "median": 0.08113384246826172, "p90": 0.584410858154297, "max": 1.082366943359375, "pos_frac": 0.625, "sample": [-0.11867523193359375, 0.10291862487792969, 0.9004364013671875, 0.7556304931640625, -0.541839599609375, 0.04932403564453125, -0.13915061950683594, -0.014606475830078125, 0.408660888671875, 0.1559600830078125, 0.16485595703125, -0.2048492431640625, 0.219207763671875, 0.21224594116210938, 0.24506378173828125, 0.3925628662109375, -0.17135047912597656, 0.2142467498779297, -0.3315620422363281, 0.14999771118164062, -0.33929443359375, -0.3023834228515625, -0.11930084228515625, 0.12373733520507812, -0.1924266815185547, 1.082366943359375, 0.601043701171875, -0.03367424011230469, 0.3176727294921875, 0.16845703125, -0.542572021484375, 0.3697357177734375, -0.05687713623046875, 0.0054340362548828125, -0.00653076171875, 0.0053005218505859375, 0.008026123046875, 0.0272369384765625, 0.4083099365234375, -0.3558807373046875, 0.05934906005859375, 0.2492198944091797, 0.16623687744140625, 0.030611038208007812, -0.044490814208984375, -0.2507190704345703, 0.12377166748046875, 0.0441436767578125, 0.10421180725097656, -0.47550201416015625, 0.11954307556152344, 0.665618896484375, 0.5430793762207031, 0.3914337158203125, 0.35489654541015625, -0.09962844848632812, 0.879913330078125, -0.146392822265625, 0.8197174072265625, 0.3402690887451172, -0.0603790283203125, -0.29698944091796875, 0.5456008911132812, -0.23817825317382812], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000021.npy"}
|
|
{"epoch": 0.030837004405286344, "step": 22, "batch_size": 64, "mean": 0.2547217905521393, "std": 0.44475993514060974, "min": -0.8624706268310547, "p10": -0.2400030136108398, "median": 0.15310287475585938, "p90": 0.9623382568359375, "max": 1.4829788208007812, "pos_frac": 0.734375, "sample": [0.035923004150390625, 0.980712890625, -0.06930160522460938, -0.06788253784179688, 0.08294677734375, -0.0184326171875, 0.290802001953125, 0.4260749816894531, 0.30849647521972656, 0.276275634765625, 0.7709884643554688, 0.32049560546875, 0.0319366455078125, 1.05975341796875, -0.8624706268310547, -0.457183837890625, 0.4235076904296875, 0.04728126525878906, -0.21080780029296875, 0.260467529296875, -0.2704124450683594, 0.78326416015625, -0.33222198486328125, 0.0670928955078125, -0.08090972900390625, 0.19070816040039062, 1.1052474975585938, 0.3139457702636719, 0.37674713134765625, 1.1242523193359375, 0.1585235595703125, -0.09964752197265625, 0.7913665771484375, 0.17182159423828125, 0.6412849426269531, 1.4829788208007812, 0.2382354736328125, -0.10492706298828125, 0.08147239685058594, 0.10477447509765625, 0.067138671875, 0.5737838745117188, 0.11968421936035156, 0.14768218994140625, -0.021257400512695312, 0.9729766845703125, -0.21118927001953125, -0.12823486328125, 0.9375152587890625, 0.594146728515625, 0.11484527587890625, 0.22129058837890625, -0.2523517608642578, 0.055538177490234375, -0.380584716796875, 0.094970703125, 0.30654144287109375, 0.4021263122558594, 0.06716156005859375, 1.110321044921875, -0.269775390625, 0.7171707153320312, 0.6270751953125, 0.06243896484375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000022.npy"}
|
|
{"epoch": 0.032305433186490456, "step": 23, "batch_size": 64, "mean": 0.22987452149391174, "std": 0.457994282245636, "min": -0.82183837890625, "p10": -0.2545526504516601, "median": 0.22069740295410156, "p90": 0.7746675491333009, "max": 1.8461761474609375, "pos_frac": 0.6875, "sample": [-0.199676513671875, 0.25476837158203125, 0.7856292724609375, 0.22777938842773438, 0.3228607177734375, 0.3737678527832031, 0.17775726318359375, 0.085906982421875, -0.18781471252441406, -0.4883880615234375, 1.32177734375, 1.5785980224609375, 0.6011962890625, -0.14633941650390625, 0.195220947265625, -0.025234222412109375, 0.4529876708984375, 0.28570556640625, -0.11773681640625, 0.2135601043701172, 0.28838348388671875, 0.2975311279296875, 0.947357177734375, -0.08436965942382812, -0.09763526916503906, -0.2900543212890625, 0.05196380615234375, -0.41394805908203125, 0.2342529296875, -0.1241607666015625, 0.3320655822753906, 0.4626617431640625, -0.10999298095703125, 0.6215114593505859, 0.1658306121826172, 0.0041351318359375, -0.1615753173828125, 0.13061904907226562, 0.2236042022705078, 0.35614585876464844, 0.3809471130371094, 0.1780548095703125, 0.22101211547851562, 0.8761825561523438, -0.2729759216308594, 1.8461761474609375, 0.0565338134765625, -0.82183837890625, 0.4021797180175781, -0.41632080078125, 0.26673316955566406, 0.7490901947021484, 0.10269927978515625, -0.3599739074707031, -0.06950759887695312, 0.5271987915039062, 0.2203826904296875, 0.7968292236328125, -0.2115650177001953, 0.3936920166015625, 0.39246368408203125, -0.0866546630859375, 0.48145294189453125, 0.5125255584716797], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000023.npy"}
|
|
{"epoch": 0.033773861967694566, "step": 24, "batch_size": 64, "mean": 0.24034002423286438, "std": 0.44114452600479126, "min": -1.005126953125, "p10": -0.14784164428710936, "median": 0.2076549530029297, "p90": 0.75387077331543, "max": 1.446624755859375, "pos_frac": 0.75, "sample": [0.48496246337890625, 0.6820945739746094, 0.08336257934570312, 0.24327850341796875, 0.6244926452636719, 0.03178596496582031, 0.5233688354492188, -0.12078857421875, 0.79150390625, 0.250640869140625, 0.7924709320068359, -0.18362808227539062, 0.11735343933105469, 0.20606613159179688, 0.3367652893066406, 0.13002395629882812, -0.0743560791015625, 0.6824588775634766, 0.5692100524902344, 0.18076324462890625, -0.051326751708984375, -0.8686141967773438, -0.07432937622070312, 0.1497802734375, 0.77716064453125, 0.6995277404785156, -0.039325714111328125, -0.823333740234375, 0.3966522216796875, 0.17090606689453125, 1.237457275390625, 0.56158447265625, 0.4539337158203125, -0.5201034545898438, 0.3914527893066406, -0.13226318359375, 0.2092437744140625, 0.0998992919921875, -0.15451812744140625, 0.38855743408203125, 0.859893798828125, 0.31603240966796875, -0.0674285888671875, 0.34717559814453125, 0.0768280029296875, 1.446624755859375, 0.85498046875, 0.0300445556640625, 0.69683837890625, 0.13472938537597656, 0.18185806274414062, 0.4364299774169922, 0.002155303955078125, 0.06342887878417969, 0.3604583740234375, -0.0074462890625, 0.4690971374511719, -0.002044677734375, 0.4126167297363281, -0.5593109130859375, 0.33188629150390625, -1.005126953125, 0.1757049560546875, 0.6021652221679688], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000024.npy"}
|
|
{"epoch": 0.03524229074889868, "step": 25, "batch_size": 64, "mean": 0.22478067874908447, "std": 0.4554341733455658, "min": -0.8603401184082031, "p10": -0.2547479629516601, "median": 0.24921417236328125, "p90": 0.7023128509521486, "max": 1.6734085083007812, "pos_frac": 0.671875, "sample": [0.3525886535644531, 0.3723258972167969, 0.535980224609375, 0.5105667114257812, 0.08325958251953125, -0.18616294860839844, -0.1772308349609375, 0.9092178344726562, 0.553253173828125, 0.10045242309570312, -0.09876251220703125, 0.48663330078125, -0.09818077087402344, 0.32140350341796875, 1.6734085083007812, -0.8603401184082031, -0.46956443786621094, 0.513397216796875, -0.13840103149414062, 0.7165412902832031, -0.5850067138671875, 0.19156646728515625, 0.3676795959472656, -0.09759521484375, -0.08834075927734375, 0.5571823120117188, -0.05887603759765625, 0.5774307250976562, -0.28414154052734375, -0.07079124450683594, -0.13463592529296875, 0.00801849365234375, -0.16278839111328125, 0.38922119140625, 0.04811859130859375, 0.5956573486328125, -0.015108108520507812, 1.2146148681640625, 0.34979248046875, 0.11365509033203125, 0.012996673583984375, 0.5067291259765625, 0.9080734252929688, 0.4977836608886719, 0.2793083190917969, 0.5333480834960938, 0.2547607421875, 0.209503173828125, 0.6691131591796875, 0.07845687866210938, 0.541351318359375, 0.1640777587890625, -0.36009979248046875, -0.617340087890625, 0.7575149536132812, -0.030626296997070312, -0.8153457641601562, 0.9814643859863281, 0.35024261474609375, 0.5904960632324219, 0.4701080322265625, 0.2436676025390625, -0.13561248779296875, 0.2799530029296875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000025.npy"}
|
|
{"epoch": 0.03671071953010279, "step": 26, "batch_size": 64, "mean": 0.41762077808380127, "std": 0.5279146432876587, "min": -0.48786163330078125, "p10": -0.17408313751220703, "median": 0.31130504608154297, "p90": 1.093994140625, "max": 1.984405517578125, "pos_frac": 0.765625, "sample": [0.5872802734375, 0.19060516357421875, 0.38202667236328125, 0.12454986572265625, 1.228515625, 0.586639404296875, -0.251190185546875, 0.7493247985839844, -0.48786163330078125, -0.09865188598632812, 1.984405517578125, 0.7229232788085938, 0.984283447265625, 0.13309478759765625, -0.18688201904296875, 0.79241943359375, 0.6906814575195312, 0.531524658203125, 0.877899169921875, -0.18140792846679688, 0.3133678436279297, -0.026338577270507812, 0.25078582763671875, 0.17431259155273438, 0.5818405151367188, 1.0086135864257812, 0.1636505126953125, 0.277435302734375, 0.250091552734375, 1.0969467163085938, 0.05860137939453125, -0.46954345703125, 0.6144237518310547, 0.1744518280029297, -0.09722518920898438, 0.5162200927734375, 1.399261474609375, -0.14942169189453125, -0.05709075927734375, 1.5671768188476562, 0.3897705078125, -0.1045684814453125, 0.7231597900390625, 0.5313491821289062, 0.3845672607421875, 1.21246337890625, -0.4621467590332031, 0.5601959228515625, 0.12050247192382812, -0.15699195861816406, -0.04840087890625, 0.30924224853515625, -0.3317852020263672, 0.6573486328125, 0.0018768310546875, 0.8720703125, 1.0871047973632812, 1.0695266723632812, 1.4944381713867188, 0.3006134033203125, 0.0748443603515625, 0.039051055908203125, 0.7057533264160156, 0.2900047302246094], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000026.npy"}
|
|
{"epoch": 0.0381791483113069, "step": 27, "batch_size": 64, "mean": 0.5043210983276367, "std": 0.5854207277297974, "min": -0.5514678955078125, "p10": -0.1569681167602539, "median": 0.38453102111816406, "p90": 1.3579463958740234, "max": 2.6791152954101562, "pos_frac": 0.84375, "sample": [0.15465927124023438, 0.7363395690917969, 0.6839447021484375, 0.20296096801757812, 0.6053314208984375, 1.7024154663085938, 0.3682861328125, 0.31744384765625, 1.3669357299804688, -0.23567771911621094, 1.050933837890625, 0.2599639892578125, 1.0306167602539062, 0.15649795532226562, 0.8561553955078125, 0.7423858642578125, 2.6791152954101562, -0.1093902587890625, 0.2650299072265625, 0.26490020751953125, 0.05731201171875, -0.5514678955078125, 0.8966064453125, 0.11520004272460938, 0.09000015258789062, 0.3762187957763672, 0.0004100799560546875, 0.030466079711914062, 0.2822608947753906, 1.2608108520507812, 0.49141693115234375, -0.17696189880371094, -0.3231048583984375, 0.84368896484375, 0.49556732177734375, 0.30799102783203125, 0.3853263854980469, 0.5210342407226562, 0.419342041015625, 0.16340255737304688, -0.15938568115234375, 0.5931167602539062, -0.1771087646484375, 0.47965240478515625, 0.68035888671875, 1.4501800537109375, -0.35350799560546875, 0.38373565673828125, 1.3369712829589844, 0.9259490966796875, 1.420806884765625, 0.04006195068359375, 0.35587310791015625, 0.6585693359375, 1.487640380859375, 0.516021728515625, 1.8133544921875, -0.15132713317871094, 0.5938148498535156, -0.0525970458984375, 0.271148681640625, 0.7860260009765625, 0.04451751708984375, 0.548309326171875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000027.npy"}
|
|
{"epoch": 0.039647577092511016, "step": 28, "batch_size": 64, "mean": 0.3272559344768524, "std": 0.6033403873443604, "min": -1.71392822265625, "p10": -0.24843025207519526, "median": 0.3050556182861328, "p90": 0.9826868057250978, "max": 1.819366455078125, "pos_frac": 0.765625, "sample": [0.9220809936523438, 0.3735198974609375, 0.1002349853515625, 0.17819786071777344, 0.9934597015380859, 0.19061279296875, -0.10130691528320312, 0.0528106689453125, -0.19431686401367188, 0.018625259399414062, -0.08666229248046875, 0.3130340576171875, 1.7617645263671875, 0.5123443603515625, 0.8228759765625, -0.00457000732421875, 0.057952880859375, 0.616729736328125, 0.1482391357421875, 0.6735458374023438, 0.24190521240234375, -0.3449249267578125, -0.2716217041015625, 0.957550048828125, -0.623687744140625, 0.2631683349609375, 0.2796630859375, 0.756134033203125, 1.2381973266601562, 0.6581306457519531, 0.6060028076171875, 0.343780517578125, 0.23841476440429688, -0.009185791015625, 0.6288719177246094, 0.5606441497802734, 0.7598686218261719, 0.30052947998046875, 0.4422798156738281, -0.1202545166015625, 0.40807342529296875, 0.28203582763671875, -0.024707794189453125, 1.092987060546875, 1.819366455078125, 0.2257080078125, 0.431427001953125, 0.06739997863769531, 1.4967155456542969, 0.5025100708007812, 0.7260684967041016, -1.4587783813476562, 0.0257568359375, 0.4326019287109375, -0.4673728942871094, 0.3095817565917969, 0.44582366943359375, 1.4161529541015625, 0.6877288818359375, -0.09466743469238281, 0.23150634765625, -1.71392822265625, -0.48394775390625, 0.3316993713378906], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000028.npy"}
|
|
{"epoch": 0.041116005873715125, "step": 29, "batch_size": 64, "mean": 0.5928229689598083, "std": 0.6201984286308289, "min": -0.9011459350585938, "p10": -0.10267028808593745, "median": 0.6038303375244141, "p90": 1.2430007934570313, "max": 2.7155914306640625, "pos_frac": 0.84375, "sample": [0.8436775207519531, -0.12082862854003906, 0.1042022705078125, 0.2373809814453125, 0.44598388671875, 0.13167953491210938, -0.0243072509765625, 1.0708541870117188, 0.7534217834472656, 0.25681304931640625, 0.205474853515625, -0.13608741760253906, 1.0427932739257812, 1.1438369750976562, 0.30185699462890625, 1.0629119873046875, 1.1212310791015625, -0.052448272705078125, -0.06030082702636719, 1.34393310546875, 0.21776580810546875, 0.8535995483398438, 0.7805709838867188, 0.23193359375, 1.2335052490234375, 0.6140213012695312, 0.09050750732421875, 0.20689773559570312, 1.102783203125, 0.371856689453125, -0.17493629455566406, 1.6555633544921875, 2.7155914306640625, -0.5545654296875, 0.9718475341796875, 0.21099090576171875, 0.18941497802734375, 0.08547782897949219, 0.8449783325195312, 1.2470703125, 1.207061767578125, 0.72894287109375, 0.6401290893554688, 0.6151351928710938, 0.3916587829589844, -0.14516067504882812, 0.902801513671875, -0.33188629150390625, 0.9161148071289062, 0.194976806640625, 0.10211944580078125, 0.1439800262451172, 1.191680908203125, 0.31508636474609375, 1.1469078063964844, 1.819091796875, -0.9011459350585938, 1.093109130859375, 0.9933815002441406, 0.5936393737792969, 1.38629150390625, 0.9810562133789062, 0.00502777099609375, 1.3837165832519531], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000029.npy"}
|
|
{"epoch": 0.042584434654919234, "step": 30, "batch_size": 64, "mean": 0.6878979206085205, "std": 0.6341769695281982, "min": -0.8761062622070312, "p10": -0.10558776855468748, "median": 0.60247802734375, "p90": 1.4384971618652345, "max": 2.423919677734375, "pos_frac": 0.84375, "sample": [1.1373367309570312, 0.01265716552734375, -0.04175758361816406, 0.42163848876953125, 1.4443359375, -0.8761062622070312, 0.41776275634765625, 0.2021026611328125, -0.18981552124023438, 0.78302001953125, 1.237548828125, 0.889129638671875, 0.5395851135253906, 1.0718650817871094, 0.416900634765625, 0.9244308471679688, 0.47220611572265625, 1.347259521484375, 0.3041229248046875, 1.1909637451171875, 0.8945846557617188, 0.639892578125, 2.423919677734375, 0.8398857116699219, 1.1721267700195312, 0.27065467834472656, 0.92596435546875, 1.1427688598632812, -0.11492156982421875, 0.4433116912841797, -0.13700103759765625, 0.15208816528320312, 0.85565185546875, -0.16875267028808594, 0.5203266143798828, 0.6488571166992188, 1.8267669677734375, 2.3553009033203125, 1.031982421875, 0.7421951293945312, -0.010843276977539062, 1.1269683837890625, 0.45809173583984375, 0.5650634765625, -0.176422119140625, 1.9651641845703125, 1.0155181884765625, 0.3722858428955078, 0.9698143005371094, 1.0364456176757812, 1.56890869140625, 1.4248733520507812, 0.3137359619140625, -0.27877044677734375, -0.08380889892578125, 0.9131011962890625, 0.4574165344238281, 0.44028472900390625, 0.7144241333007812, 0.033123016357421875, 0.4851531982421875, 1.7348175048828125, 0.3161773681640625, 0.49315643310546875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000030.npy"}
|
|
{"epoch": 0.04405286343612335, "step": 31, "batch_size": 64, "mean": 0.4500678479671478, "std": 0.6682660579681396, "min": -0.5946807861328125, "p10": -0.3277252197265625, "median": 0.3796844482421875, "p90": 1.2425949096679694, "max": 2.9293136596679688, "pos_frac": 0.703125, "sample": [0.17922019958496094, 0.9642486572265625, -0.373016357421875, 0.5541229248046875, 1.933074951171875, 0.4388580322265625, -0.3123130798339844, -0.5761642456054688, 0.8582611083984375, -0.5422821044921875, 2.9293136596679688, -0.07177734375, -0.047576904296875, 0.3858489990234375, 0.18045425415039062, 0.30556297302246094, 1.0612258911132812, 0.28223228454589844, 2.1464157104492188, 0.7805519104003906, -0.0917510986328125, -0.13671493530273438, 0.17038726806640625, 0.1144256591796875, 0.4411735534667969, 0.2729167938232422, 0.8315773010253906, 0.2875175476074219, 0.5094432830810547, -0.16466140747070312, 0.852020263671875, 0.8104248046875, 1.6673126220703125, -0.0097808837890625, 0.45926475524902344, -0.344482421875, 0.23529815673828125, 0.9440383911132812, -0.330352783203125, 0.5039215087890625, 1.070831298828125, 0.8904647827148438, -0.23785781860351562, 0.3651103973388672, -0.3251190185546875, -0.5946807861328125, 0.6250076293945312, 0.627899169921875, -0.17551422119140625, -0.03717041015625, 0.2910804748535156, 1.3162078857421875, 1.3505401611328125, -0.3288421630859375, -0.144683837890625, 0.6486587524414062, 0.25946044921875, 0.3735198974609375, 0.7632026672363281, 0.43221282958984375, 0.795440673828125, 1.4430923461914062, 0.9047775268554688, 0.39246368408203125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000031.npy"}
|
|
{"epoch": 0.04552129221732746, "step": 32, "batch_size": 64, "mean": 0.7446720600128174, "std": 0.9461292624473572, "min": -0.6966209411621094, "p10": -0.24613609313964835, "median": 0.5001411437988281, "p90": 2.269461059570313, "max": 4.2985382080078125, "pos_frac": 0.828125, "sample": [0.17803955078125, 0.3624305725097656, 0.627349853515625, 1.1696319580078125, 0.8597030639648438, 0.7046051025390625, 0.1541900634765625, 0.502685546875, 0.3002796173095703, 0.4177703857421875, 1.1039962768554688, 0.6780853271484375, -0.4565391540527344, -0.06310462951660156, 0.08642578125, 1.1566390991210938, -0.3619270324707031, 0.9800872802734375, -0.12134933471679688, 2.370086669921875, 1.4594573974609375, 1.4775009155273438, 2.5577545166015625, -0.550750732421875, 0.8525047302246094, 0.5722198486328125, 0.298553466796875, 0.101898193359375, -0.6966209411621094, 0.9044952392578125, 0.33568572998046875, 0.1282672882080078, 0.3458595275878906, 3.1822052001953125, 1.2992172241210938, 0.28808021545410156, 1.240570068359375, 0.7751197814941406, -0.14366912841796875, 0.34966278076171875, 2.1685028076171875, 1.2198200225830078, -0.2900505065917969, 0.2886962890625, 4.2985382080078125, 0.9290542602539062, 0.396575927734375, 2.5006942749023438, 2.3127288818359375, -0.48062705993652344, 0.1827564239501953, 0.49759674072265625, 2.5672607421875, 1.0455398559570312, -0.6149559020996094, -0.05403900146484375, 0.8461494445800781, 0.991424560546875, 0.2878589630126953, 0.436187744140625, 0.10408401489257812, 0.6162948608398438, 1.5881805419921875, 0.3936424255371094], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000032.npy"}
|
|
{"epoch": 0.04698972099853157, "step": 33, "batch_size": 64, "mean": 0.6417955160140991, "std": 0.667909562587738, "min": -0.7734222412109375, "p10": -0.20271682739257804, "median": 0.6042671203613281, "p90": 1.5934906005859377, "max": 2.473480224609375, "pos_frac": 0.84375, "sample": [-0.264923095703125, 0.4782447814941406, 0.4182243347167969, 0.4909820556640625, 2.473480224609375, -0.07321548461914062, 0.4676971435546875, 1.625701904296875, 0.5564823150634766, 0.5656375885009766, 0.6801681518554688, 0.8240585327148438, 1.612030029296875, 0.8463058471679688, 1.0795745849609375, 0.23966598510742188, 0.7810821533203125, -0.7734222412109375, 0.6719970703125, -0.76220703125, -0.11254119873046875, 1.55023193359375, 1.206939697265625, 2.3340072631835938, 0.6942672729492188, 0.09200286865234375, 1.68798828125, 0.0196380615234375, 0.23436737060546875, 1.821044921875, 0.1550445556640625, 0.214019775390625, 2.2111968994140625, 0.1400909423828125, -0.264892578125, 0.6038894653320312, 0.18965911865234375, 0.5582199096679688, 0.9784126281738281, 0.6714630126953125, 0.6224594116210938, 1.0051116943359375, -0.241363525390625, 0.45526123046875, 0.2662544250488281, 1.0802764892578125, 0.6370620727539062, 1.04583740234375, 0.8228034973144531, -0.399261474609375, 0.26422882080078125, -0.2520332336425781, 0.8558692932128906, 0.5520286560058594, 0.604644775390625, 1.1482467651367188, 1.1772193908691406, -0.0239715576171875, 0.906524658203125, 0.2822456359863281, 0.8658218383789062, 0.46395111083984375, 0.7936496734619141, 1.2194328308105469], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000033.npy"}
|
|
{"epoch": 0.048458149779735685, "step": 34, "batch_size": 64, "mean": 0.8307995796203613, "std": 0.8640182614326477, "min": -0.837158203125, "p10": -0.0704975128173828, "median": 0.5611610412597656, "p90": 1.9912551879882814, "max": 3.329620361328125, "pos_frac": 0.859375, "sample": [2.3612823486328125, 0.11446762084960938, 0.628265380859375, -0.051364898681640625, 2.6786346435546875, 0.4264392852783203, 1.95294189453125, 1.376068115234375, 0.3538970947265625, 0.4149169921875, 0.1855621337890625, -0.0976715087890625, 0.500701904296875, -0.07869720458984375, 2.0076751708984375, 2.237945556640625, 0.09705352783203125, -0.16437911987304688, 1.60369873046875, 0.38336944580078125, 1.2949371337890625, 0.08861923217773438, 1.152547836303711, 1.5628738403320312, 1.69342041015625, 1.8554306030273438, 1.6108245849609375, -0.30040740966796875, 0.1324920654296875, 1.2390060424804688, -0.25272369384765625, 0.7903976440429688, 0.726287841796875, 0.9918785095214844, 2.515899658203125, 0.8905067443847656, 1.2867469787597656, 3.329620361328125, 0.6281719207763672, -0.04912567138671875, 0.3182411193847656, 0.3659706115722656, 1.1075248718261719, 1.7475128173828125, 0.5336151123046875, 0.8096427917480469, 0.1361827850341797, 0.19451522827148438, 2.193817138671875, 0.21805191040039062, 0.14590072631835938, -0.837158203125, 0.43906402587890625, 0.8818206787109375, 0.2673187255859375, -0.29332542419433594, 1.789794921875, 0.3659324645996094, 0.3306121826171875, 0.5887069702148438, 1.636505126953125, 1.81689453125, 0.09874153137207031, 0.19707870483398438], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000034.npy"}
|
|
{"epoch": 0.049926578560939794, "step": 35, "batch_size": 64, "mean": 1.2235569953918457, "std": 1.1256182193756104, "min": -0.5068817138671875, "p10": 0.12170696258544927, "median": 1.0032234191894531, "p90": 2.3609146118164066, "max": 7.0582275390625, "pos_frac": 0.921875, "sample": [1.1066818237304688, 1.1678276062011719, 2.203216552734375, 1.3929824829101562, -0.129730224609375, 0.68206787109375, 1.391510009765625, -0.05718231201171875, 0.09432601928710938, 1.235107421875, 1.56158447265625, 0.26213645935058594, 1.3677825927734375, 0.87774658203125, 0.42236328125, 0.478973388671875, 1.7668495178222656, 3.1750869750976562, 0.9655303955078125, 0.3497161865234375, 0.9732818603515625, 2.1713104248046875, 2.980682373046875, 0.1839447021484375, 0.46503448486328125, 0.5214385986328125, 0.49405670166015625, 1.042205810546875, 0.7721710205078125, 0.927642822265625, 7.0582275390625, 1.4315376281738281, 1.141845703125, 1.3273468017578125, 0.32256317138671875, 3.4044952392578125, 0.2558174133300781, 1.670379638671875, 2.6223907470703125, 2.0303573608398438, 0.6459274291992188, 2.2983551025390625, 0.9623641967773438, 1.715179443359375, 0.7875862121582031, -0.1348724365234375, 1.87347412109375, 2.387725830078125, 0.8064956665039062, 1.0331649780273438, 0.9004096984863281, -0.0718994140625, 0.5075359344482422, 2.5883026123046875, 1.0710067749023438, 0.9010677337646484, 1.5941925048828125, 0.09503364562988281, 1.6204910278320312, -0.5068817138671875, 0.8351898193359375, 2.0814208984375, 2.0108795166015625, 0.19419479370117188], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000035.npy"}
|
|
{"epoch": 0.0513950073421439, "step": 36, "batch_size": 64, "mean": 1.1225972175598145, "std": 1.2831287384033203, "min": -2.5635452270507812, "p10": -0.19100265502929686, "median": 0.9129085540771484, "p90": 3.0314445495605487, "max": 4.4469451904296875, "pos_frac": 0.796875, "sample": [1.2569961547851562, -0.11786460876464844, 0.8154773712158203, 1.5208740234375, 1.0682754516601562, 0.7111568450927734, 0.76800537109375, -0.42413330078125, -0.2462291717529297, 2.4127044677734375, 1.6421432495117188, 0.012481689453125, -0.16983795166015625, 0.46602439880371094, 0.04006767272949219, 1.7536392211914062, 0.18059539794921875, 1.7588043212890625, 1.646575927734375, 0.36441802978515625, -0.2000732421875, -0.11122322082519531, -0.1374969482421875, 0.14800453186035156, 3.649566650390625, 4.168247222900391, 1.5711746215820312, -0.6579437255859375, 3.3520050048828125, -0.02704620361328125, 1.006357192993164, 1.368438720703125, -0.38474273681640625, 2.5649185180664062, -0.0526885986328125, 0.2291889190673828, 3.23138427734375, 0.6472320556640625, 2.2573165893554688, 0.9276008605957031, 2.0814590454101562, 0.9408454895019531, 2.1279373168945312, 0.7689628601074219, 3.3235626220703125, 1.1227493286132812, 0.7644920349121094, 4.4469451904296875, 0.8420314788818359, -2.5635452270507812, 2.471668243408203, 1.2890090942382812, 1.8012161254882812, 0.7692070007324219, 2.3022842407226562, 1.1470508575439453, -0.29058837890625, 0.3914966583251953, 1.930755615234375, 0.6044349670410156, 1.6589279174804688, 3.6361541748046875, 0.8982162475585938, 0.3705482482910156], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000036.npy"}
|
|
{"epoch": 0.05286343612334802, "step": 37, "batch_size": 64, "mean": 1.0293034315109253, "std": 1.4328175783157349, "min": -2.095733642578125, "p10": -0.28441429138183594, "median": 0.6779975891113281, "p90": 2.705603027343751, "max": 7.14788818359375, "pos_frac": 0.796875, "sample": [1.4312515258789062, -0.009307861328125, 1.1065750122070312, 0.8648452758789062, 0.42024993896484375, -0.38726806640625, -0.28226470947265625, 0.50823974609375, 1.1799087524414062, -0.3335762023925781, 0.4241180419921875, 2.98077392578125, 0.3512535095214844, -0.8544921875, -0.1567535400390625, 0.0325927734375, 0.6114902496337891, 0.20654678344726562, 2.031169891357422, 7.14788818359375, 1.2593917846679688, 4.5882110595703125, 1.1909599304199219, 0.9761543273925781, 0.9228363037109375, 1.181365966796875, 0.5762481689453125, -0.043304443359375, 1.5014572143554688, 1.410064697265625, 0.6269760131835938, 0.20096588134765625, 0.3786125183105469, -0.8174362182617188, 0.8335800170898438, 0.21927642822265625, 0.29308319091796875, 2.503997802734375, 2.2884292602539062, 3.259063720703125, -0.2853355407714844, 2.7747344970703125, 0.7290191650390625, 1.9915390014648438, -0.1478424072265625, 0.7538108825683594, 0.10913658142089844, -0.1511993408203125, 0.7641754150390625, 1.5205307006835938, 2.2919387817382812, 0.1253509521484375, 2.5442962646484375, 2.318023681640625, 0.33307456970214844, 3.0209579467773438, 2.1486854553222656, -2.095733642578125, 0.3274993896484375, -0.4753265380859375, 2.4419212341308594, 3.7165069580078125, 0.2333831787109375, 0.2630958557128906], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000037.npy"}
|
|
{"epoch": 0.05433186490455213, "step": 38, "batch_size": 64, "mean": 1.3506267070770264, "std": 1.5932660102844238, "min": -1.151092529296875, "p10": -0.06021270751953124, "median": 0.9063968658447266, "p90": 3.2411178588867195, "max": 7.55230712890625, "pos_frac": 0.859375, "sample": [0.139739990234375, 7.55230712890625, 3.9508056640625, 1.9984397888183594, 0.4754791259765625, 0.1420421600341797, 1.8151321411132812, -0.0876922607421875, 0.8700294494628906, 1.313507080078125, 1.691192626953125, 0.9409065246582031, 0.3334808349609375, 2.4189376831054688, 1.2813796997070312, -0.1633148193359375, 0.768768310546875, 0.7583541870117188, -0.04067420959472656, 2.7608871459960938, 0.9843978881835938, 1.2880401611328125, 2.9663429260253906, 0.12406730651855469, 3.4381103515625, 0.18255233764648438, 1.9091796875, 2.3646469116210938, 1.5941276550292969, 0.5821075439453125, 0.6740283966064453, 3.0061111450195312, -1.151092529296875, 0.9569320678710938, 0.4416522979736328, 0.444427490234375, 0.87188720703125, 6.3655242919921875, 0.49615478515625, 0.2643585205078125, 3.3418350219726562, 4.874664306640625, 1.7588653564453125, 0.6369400024414062, 0.7201042175292969, -0.08660125732421875, -0.064483642578125, 0.10150146484375, 0.8176116943359375, 1.3224639892578125, -0.5103225708007812, 0.13885498046875, -0.0502471923828125, 1.3688163757324219, 1.0336017608642578, 1.811004638671875, 1.5207366943359375, 5.152427673339844, 1.8731231689453125, 0.33002471923828125, 1.5546646118164062, -0.5812110900878906, 0.7878875732421875, 1.8645782470703125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000038.npy"}
|
|
{"epoch": 0.055800293685756244, "step": 39, "batch_size": 64, "mean": 1.6912683248519897, "std": 1.4913898706436157, "min": -2.484710693359375, "p10": 0.19280166625976564, "median": 1.3570976257324219, "p90": 3.8250688552856453, "max": 5.27679443359375, "pos_frac": 0.90625, "sample": [2.2682876586914062, 2.2466583251953125, 4.9700775146484375, 1.69842529296875, 1.6622314453125, -0.5492324829101562, 2.9535369873046875, 2.629364013671875, 1.1886005401611328, 2.2709484100341797, 0.8509063720703125, 3.2951583862304688, 1.677947998046875, 1.0758304595947266, 4.084617614746094, 0.8679122924804688, 0.6129913330078125, 0.7067947387695312, -0.41583251953125, 1.1036148071289062, 0.8682403564453125, 2.7929611206054688, 2.680908203125, 0.40004539489746094, 1.249908447265625, 3.0543136596679688, -0.220184326171875, 1.7199211120605469, 4.707000732421875, 0.32329559326171875, 0.18325042724609375, 0.5079097747802734, 2.8665695190429688, 2.509124755859375, -2.484710693359375, 2.1792373657226562, 0.33837318420410156, 1.9540328979492188, 3.6431140899658203, 5.27679443359375, 1.0738983154296875, 2.5807342529296875, 4.399559020996094, 1.19000244140625, 1.27642822265625, 0.5380134582519531, -0.4913177490234375, 0.5174407958984375, 1.4377670288085938, 0.7341709136962891, 1.08746337890625, 3.9030494689941406, 2.8978424072265625, 2.9286041259765625, 0.5480899810791016, 4.007354736328125, 3.31573486328125, 1.0627632141113281, 1.5650863647460938, 0.215087890625, 0.641082763671875, -0.014036178588867188, 0.7852954864501953, 2.2921104431152344], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000039.npy"}
|
|
{"epoch": 0.05726872246696035, "step": 40, "batch_size": 64, "mean": 1.5692869424819946, "std": 1.7425506114959717, "min": -1.0187225341796875, "p10": -0.09466171264648438, "median": 1.1390457153320312, "p90": 3.915401077270508, "max": 7.773681640625, "pos_frac": 0.84375, "sample": [-0.46543121337890625, 1.0447616577148438, 0.6491241455078125, 1.3052635192871094, 1.1007728576660156, 0.5481796264648438, 0.575592041015625, 0.2881317138671875, 0.2953052520751953, 2.243213653564453, 7.064056396484375, 3.087127685546875, 1.1957836151123047, 3.3749160766601562, 1.4351119995117188, -0.4079399108886719, 1.9679718017578125, 2.2079925537109375, 0.007993698120117188, -1.0187225341796875, 2.62908935546875, -0.09271240234375, 0.717315673828125, -0.05035400390625, 2.1411819458007812, 1.6100921630859375, 0.2567253112792969, 0.8807964324951172, 0.5276412963867188, 0.2034759521484375, 4.525459289550781, -0.2376995086669922, 3.8830184936523438, 0.9102020263671875, 0.4029560089111328, 4.623443603515625, -0.000728607177734375, 3.929279327392578, 0.6964912414550781, 1.1773185729980469, 5.0361480712890625, 0.9401817321777344, 0.9704818725585938, 7.773681640625, -0.47878265380859375, 1.5689773559570312, -0.09549713134765625, 1.9097137451171875, 2.5505828857421875, 2.2818222045898438, 2.07928466796875, 2.575714111328125, 3.7797164916992188, 1.7250308990478516, 0.9728775024414062, 1.2478752136230469, 4.2503814697265625, -0.353271484375, 1.6648178100585938, 0.04396820068359375, 0.14272308349609375, 1.6831741333007812, 2.3652572631835938, 0.5673103332519531], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000040.npy"}
|
|
{"epoch": 0.05873715124816446, "step": 41, "batch_size": 64, "mean": 1.867814540863037, "std": 2.1468727588653564, "min": -2.99066162109375, "p10": 0.0148834228515628, "median": 1.2330875396728516, "p90": 4.230191040039062, "max": 9.173782348632812, "pos_frac": 0.890625, "sample": [1.0165901184082031, 1.2293891906738281, 3.4459075927734375, 1.6227874755859375, 0.6588516235351562, 2.1923599243164062, 3.467742919921875, 2.451946258544922, -0.7078704833984375, 3.1918106079101562, 6.77593994140625, 0.7316360473632812, 2.163330078125, 9.173782348632812, 0.3140850067138672, 0.53082275390625, 0.9668731689453125, 0.9459152221679688, 0.35729217529296875, -2.0039825439453125, 1.551605224609375, 3.6680030822753906, 4.155364990234375, 0.9745750427246094, 7.27728271484375, 1.1190223693847656, 4.2332763671875, 5.0833740234375, 0.5574417114257812, 0.3927574157714844, 2.0679779052734375, 0.423065185546875, 7.24444580078125, 0.8419647216796875, 0.8876724243164062, 3.5757827758789062, 1.236785888671875, 3.0764617919921875, -0.1128692626953125, 2.6625404357910156, 4.222991943359375, 1.9717578887939453, -2.99066162109375, 3.5142669677734375, 0.36981964111328125, 0.3129730224609375, 2.187255859375, 2.2740821838378906, 2.1054916381835938, 0.5083484649658203, 1.0560455322265625, 0.8237380981445312, 0.87933349609375, 0.6257553100585938, 0.7899494171142578, 1.4399585723876953, -0.5140304565429688, 3.5303421020507812, 1.2666168212890625, 0.3843345642089844, -0.6335887908935547, -0.38869476318359375, 1.6092948913574219, 4.7530059814453125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000041.npy"}
|
|
{"epoch": 0.06020558002936858, "step": 42, "batch_size": 64, "mean": 2.601499319076538, "std": 2.482137680053711, "min": -1.5631637573242188, "p10": 0.058539962768554825, "median": 2.057371139526367, "p90": 6.06165008544922, "max": 11.2684326171875, "pos_frac": 0.90625, "sample": [1.9669952392578125, -0.00115203857421875, 1.7317523956298828, 10.653854370117188, 11.2684326171875, 2.5374755859375, 0.37439727783203125, 5.76739501953125, 2.337158203125, 3.0301742553710938, 1.8641357421875, 6.225105285644531, 2.8072967529296875, 7.89630126953125, 0.8184986114501953, 1.442626953125, 2.7356185913085938, -0.02478790283203125, 2.8195877075195312, 3.1453781127929688, 1.3118820190429688, 1.2561054229736328, 4.304420471191406, 0.28687286376953125, 0.9688262939453125, 3.2270278930664062, -0.207489013671875, 2.496551513671875, 2.087200164794922, 0.0004119873046875, 2.5218162536621094, 5.693450927734375, 0.4211750030517578, 6.57672119140625, 2.5728988647460938, 1.6790771484375, 0.6576709747314453, 1.343667984008789, 2.0275421142578125, -0.7807388305664062, 4.0256195068359375, -0.3193817138671875, 1.61328125, 6.1877593994140625, 1.7148666381835938, 4.3771514892578125, 1.2054615020751953, 0.35773468017578125, 1.2013053894042969, 1.7784423828125, 0.8672676086425781, 6.2259521484375, -1.5631637573242188, 0.5750160217285156, 3.0130233764648438, 3.8147430419921875, 0.19417190551757812, 2.322845458984375, 4.8377532958984375, 2.5758819580078125, 3.9627227783203125, 5.01129150390625, 0.9318904876708984, 3.74298095703125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000042.npy"}
|
|
{"epoch": 0.06167400881057269, "step": 43, "batch_size": 64, "mean": 2.4315857887268066, "std": 1.9878431558609009, "min": -0.9192047119140625, "p10": 0.3126018524169922, "median": 2.2747802734375, "p90": 4.6076416015625, "max": 10.165512084960938, "pos_frac": 0.953125, "sample": [0.3265419006347656, 0.3136405944824219, 0.20690155029296875, 3.4606399536132812, 3.878173828125, 0.3504638671875, 4.395374298095703, 2.6136322021484375, 2.916301727294922, 6.545562744140625, 0.6748199462890625, 1.9992637634277344, 0.7799797058105469, 2.179962158203125, 2.826976776123047, 0.7228164672851562, 3.50439453125, 5.715736389160156, 1.9869003295898438, 0.3348522186279297, 6.4254150390625, 2.9957275390625, 5.494415283203125, 0.96392822265625, 2.9113845825195312, 2.546783447265625, 0.002582550048828125, 2.4062957763671875, -0.0057277679443359375, 4.6342010498046875, 1.5045547485351562, 2.633005142211914, 3.7610015869140625, 2.732025146484375, 2.0943527221679688, 3.826770782470703, 0.31215667724609375, 2.369598388671875, 1.8466911315917969, 0.31664276123046875, 1.7584075927734375, 1.835693359375, 6.816322326660156, 3.75164794921875, 0.8848953247070312, -0.9192047119140625, 1.0814075469970703, 2.87078857421875, 2.1641082763671875, 2.81304931640625, 2.4779396057128906, 3.4890518188476562, 2.1457901000976562, 0.5422458648681641, 1.9230804443359375, 10.165512084960938, 3.875946044921875, 1.3415031433105469, -0.18816757202148438, 0.5926780700683594, 2.5937728881835938, 4.5456695556640625, 3.2963027954101562, 0.25830078125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000043.npy"}
|
|
{"epoch": 0.0631424375917768, "step": 44, "batch_size": 64, "mean": 2.852534294128418, "std": 2.277489423751831, "min": -0.7397136688232422, "p10": 0.5274934768676758, "median": 2.4478912353515625, "p90": 5.83011474609375, "max": 10.12384033203125, "pos_frac": 0.953125, "sample": [4.01739501953125, 2.66168212890625, 1.6634235382080078, 0.5929775238037109, 7.108211517333984, 2.6771011352539062, 3.4503326416015625, 1.5716552734375, 2.3618392944335938, 5.6745758056640625, 6.6666717529296875, 0.13580322265625, 4.553436279296875, 5.3524627685546875, 5.402008056640625, 3.5977096557617188, 0.7060127258300781, 2.470550537109375, 2.3220996856689453, 5.6399383544921875, 0.5243206024169922, 3.2138671875, 2.5918216705322266, 7.81640625, 2.077484130859375, 3.21636962890625, 0.587158203125, 1.4232330322265625, 5.8967742919921875, 1.5241336822509766, 3.3856124877929688, -0.18680381774902344, 1.7420539855957031, 1.1157150268554688, 1.4707927703857422, 3.3350830078125, 0.8595733642578125, 1.667633056640625, 5.62579345703125, 2.42523193359375, 0.5348968505859375, 2.1155548095703125, 3.7483062744140625, 3.9282913208007812, 1.4972801208496094, 3.2861328125, 2.8569488525390625, 2.489288330078125, 0.5390338897705078, 0.1945343017578125, 7.049224853515625, 4.408111572265625, 1.3841552734375, 3.6110992431640625, 1.3681411743164062, 10.12384033203125, 0.7006416320800781, -0.3931121826171875, 3.899139404296875, 1.6872673034667969, -0.7397136688232422, 0.13648223876953125, 1.2730712890625, 7.925453186035156], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000044.npy"}
|
|
{"epoch": 0.06461086637298091, "step": 45, "batch_size": 64, "mean": 2.3724491596221924, "std": 2.6939775943756104, "min": -1.4713287353515625, "p10": -0.13217582702636715, "median": 1.758474349975586, "p90": 6.253539657592774, "max": 12.83599853515625, "pos_frac": 0.875, "sample": [3.58319091796875, 1.4715728759765625, -0.14655303955078125, 2.6266212463378906, 0.3130455017089844, 1.607248306274414, 4.152130126953125, 6.8252105712890625, 4.79595947265625, 3.5864410400390625, 0.6172637939453125, 8.017837524414062, 3.715005874633789, 0.4323310852050781, 0.0055179595947265625, 1.8329811096191406, 0.1092376708984375, 1.8689193725585938, 0.09159660339355469, 6.288578033447266, -0.10698699951171875, 0.37660789489746094, 2.9287261962890625, 0.6684379577636719, 0.6553153991699219, 5.788209915161133, 6.685188293457031, 4.4320526123046875, 0.2926158905029297, 0.450164794921875, 5.8739013671875, 2.865020751953125, 6.171783447265625, 0.2128753662109375, 0.0171661376953125, 1.7624969482421875, 6.389434814453125, 3.9784698486328125, -0.3838348388671875, 1.8008041381835938, 1.5559558868408203, 0.39972686767578125, -1.0993423461914062, -1.4713287353515625, 1.198089599609375, 12.83599853515625, 1.4975357055664062, 7.314613342285156, 3.4618682861328125, -1.029510498046875, 2.3369712829589844, 2.22344970703125, 1.7544517517089844, 0.45514869689941406, 1.26446533203125, 5.9888458251953125, 1.01239013671875, 2.443450927734375, 2.274463653564453, 2.2972869873046875, -0.14297103881835938, 3.4252548217773438, 0.5884552001953125, -1.4011077880859375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000045.npy"}
|
|
{"epoch": 0.06607929515418502, "step": 46, "batch_size": 64, "mean": 2.9789419174194336, "std": 3.2554383277893066, "min": -2.84442138671875, "p10": -0.022462081909179647, "median": 2.100412368774414, "p90": 7.0821464538574235, "max": 15.3477783203125, "pos_frac": 0.890625, "sample": [0.7752094268798828, 0.138397216796875, 5.169403076171875, 7.772430419921875, 3.8030853271484375, -2.4099044799804688, 1.357269287109375, 1.609201431274414, 7.247314453125, 3.489034652709961, 6.460109710693359, 1.659860610961914, 0.304779052734375, 2.2546768188476562, 4.920654296875, 0.1450958251953125, 0.06927490234375, 0.6449012756347656, 4.557075500488281, 12.336578369140625, -0.4546031951904297, 0.9861297607421875, -0.039340972900390625, 3.3837966918945312, 0.38254547119140625, 4.3345184326171875, 3.0732574462890625, 1.7240543365478516, 4.001091003417969, 2.2997798919677734, 4.6520233154296875, 0.4299468994140625, -2.84442138671875, 1.0653343200683594, 3.5968780517578125, 6.267127990722656, 5.495391845703125, -0.14467620849609375, 8.203399658203125, 0.3986358642578125, 0.555206298828125, 3.901092529296875, 5.470027923583984, -0.16904449462890625, 15.3477783203125, 0.0169219970703125, 6.085296630859375, 6.696754455566406, 0.7569789886474609, 2.7148208618164062, 1.9461479187011719, 8.4525146484375, 3.378023147583008, 0.06207275390625, 1.8302154541015625, 7.673713684082031, 1.2691917419433594, 2.5519752502441406, 3.2732467651367188, 5.463691711425781, 1.8579292297363281, 0.9894695281982422, 1.6287059783935547, -0.21576690673828125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000046.npy"}
|
|
{"epoch": 0.06754772393538913, "step": 47, "batch_size": 64, "mean": 2.989128351211548, "std": 2.697270154953003, "min": -2.40863037109375, "p10": 0.13678665161132814, "median": 2.2779693603515625, "p90": 6.631605529785157, "max": 10.777679443359375, "pos_frac": 0.9375, "sample": [2.24169921875, 3.0643997192382812, 0.24224853515625, 1.4349327087402344, 4.798824310302734, 1.5402755737304688, 0.5742950439453125, 1.4529571533203125, 6.9172210693359375, 2.922046661376953, 0.14694976806640625, 6.493743896484375, 6.400489807128906, 1.1474838256835938, 3.498638153076172, 2.6165618896484375, 0.07608795166015625, 1.571319580078125, 2.6593475341796875, 5.308662414550781, 6.46966552734375, 1.1033554077148438, 0.9250068664550781, 3.360748291015625, 3.8559417724609375, 1.6660537719726562, -1.4724578857421875, 6.6906890869140625, 7.930908203125, 2.8697509765625, -0.2588691711425781, 3.4453887939453125, 6.186496734619141, 4.37396240234375, -2.40863037109375, 0.018007278442382812, 0.48497772216796875, 1.4743919372558594, 1.2667312622070312, 9.56488037109375, 6.2013397216796875, 2.169157028198242, 1.5433082580566406, 3.65582275390625, 9.738327026367188, 6.75177001953125, 0.5062713623046875, 3.212339401245117, 10.777679443359375, -0.17183685302734375, 1.67523193359375, 2.043182373046875, 0.1324310302734375, 1.6487751007080078, 2.314239501953125, 1.2444229125976562, 5.113487243652344, 1.58404541015625, 3.4272308349609375, 2.049530029296875, 4.549468994140625, 3.924224853515625, 2.200695037841797, 2.3578853607177734], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000047.npy"}
|
|
{"epoch": 0.06901615271659324, "step": 48, "batch_size": 64, "mean": 3.1348774433135986, "std": 3.2677438259124756, "min": -7.3095855712890625, "p10": 0.15635910034179698, "median": 2.338470458984375, "p90": 7.89001922607422, "max": 13.0052490234375, "pos_frac": 0.90625, "sample": [4.0988311767578125, 0.6247406005859375, 8.033523559570312, 8.482421875, 7.1812286376953125, 2.9989547729492188, -0.7362213134765625, 2.4246749877929688, 5.610984802246094, 4.664924621582031, 1.9001312255859375, 6.071044921875, 2.4748764038085938, 0.397186279296875, 0.11357879638671875, 1.5762710571289062, 7.55517578125, -0.11893463134765625, 0.6042098999023438, -0.060276031494140625, 0.5604438781738281, 8.696929931640625, -0.0504608154296875, 2.2156448364257812, 6.9674072265625, 1.8186264038085938, 3.7718887329101562, 3.4219932556152344, 0.47559356689453125, 3.6694679260253906, 1.7778968811035156, 1.73968505859375, 3.98455810546875, 1.9150543212890625, 9.883010864257812, 3.8197269439697266, 4.8498382568359375, 10.559226989746094, 3.6341552734375, 1.2785377502441406, 1.0196762084960938, 1.3317203521728516, 1.1964263916015625, 2.9636688232421875, 0.2561798095703125, 4.270198822021484, 3.3561763763427734, 1.8563156127929688, -2.172119140625, 8.40237045288086, 3.024667739868164, 3.5534210205078125, 1.857696533203125, 13.0052490234375, 4.662864685058594, -7.3095855712890625, 5.954196929931641, 1.7732009887695312, 0.6534805297851562, 0.8284759521484375, 5.4233551025390625, 1.6843032836914062, 1.9013938903808594, 2.2522659301757812], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000048.npy"}
|
|
{"epoch": 0.07048458149779736, "step": 49, "batch_size": 64, "mean": 3.8097660541534424, "std": 3.9657328128814697, "min": -3.8554840087890625, "p10": -0.020603561401367147, "median": 3.3138208389282227, "p90": 8.56391830444336, "max": 16.633575439453125, "pos_frac": 0.890625, "sample": [3.088245391845703, 0.6070213317871094, 0.4314002990722656, 3.0148391723632812, 3.0460853576660156, 5.750453948974609, 1.3576431274414062, 2.578632354736328, -1.8564300537109375, 0.7074050903320312, 0.2243061065673828, 2.2477245330810547, 3.1574172973632812, 9.661598205566406, 7.452972412109375, 3.5171966552734375, 4.184825897216797, 2.2560977935791016, 4.188819885253906, 1.3590469360351562, 2.7183189392089844, -3.1162776947021484, 3.8541717529296875, -0.03814506530761719, 14.397705078125, 10.263992309570312, 1.300323486328125, 5.7655487060546875, 3.5757904052734375, 0.6400547027587891, 8.188758850097656, 16.633575439453125, 4.7365875244140625, 1.1790924072265625, -0.99627685546875, -3.8554840087890625, 4.2640380859375, 3.781679153442383, 4.620292663574219, 4.858604431152344, 3.2798023223876953, 0.020326614379882812, -0.718536376953125, 4.656181335449219, 5.3984832763671875, 14.69317626953125, 2.528350830078125, 2.6305007934570312, 8.724700927734375, 7.2517242431640625, 6.9813079833984375, 3.34783935546875, 4.441993713378906, 5.3188323974609375, 12.95880126953125, 0.7202396392822266, -1.25335693359375, 3.6759109497070312, 1.8859176635742188, 3.9597091674804688, 5.4315948486328125, 1.6480140686035156, 4.907680511474609, 1.5881805419921875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000049.npy"}
|
|
{"epoch": 0.07195301027900147, "step": 50, "batch_size": 64, "mean": 4.163365364074707, "std": 4.100195407867432, "min": -3.2452163696289062, "p10": -0.5954093933105468, "median": 3.2005043029785156, "p90": 9.539332580566409, "max": 15.543975830078125, "pos_frac": 0.828125, "sample": [7.764556884765625, -0.0117645263671875, -2.35162353515625, 10.534835815429688, 1.3905677795410156, 3.0562591552734375, 8.004684448242188, 2.2074203491210938, 4.125370025634766, 1.8606500625610352, -0.9208221435546875, 7.5594482421875, 3.8653335571289062, 15.543975830078125, 0.3423919677734375, 2.453643798828125, 8.069351196289062, 3.2982635498046875, 9.8170166015625, 4.346435546875, 0.7199478149414062, 5.714630126953125, 10.304641723632812, 2.897216796875, 2.1210403442382812, 5.5832366943359375, -0.06096649169921875, -0.9180488586425781, 3.1027450561523438, 8.891403198242188, 6.8190765380859375, 5.540641784667969, 10.25747299194336, 15.367996215820312, -3.2452163696289062, 0.5849342346191406, 1.3140335083007812, 1.8939590454101562, 0.30876731872558594, 3.74755859375, 2.2046432495117188, 1.6460647583007812, 6.580528259277344, 4.6412811279296875, 7.977657318115234, 6.703239440917969, -0.5126571655273438, 1.9923439025878906, 5.5814361572265625, -0.6308746337890625, 6.053619384765625, 2.7764663696289062, 4.6439666748046875, 6.066307067871094, 2.6397247314453125, 6.785911560058594, 8.012046813964844, 2.1954803466796875, 13.145988464355469, -0.7833480834960938, 7.242420196533203, 1.6661300659179688, -1.941375732421875, -0.1327037811279297], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000050.npy"}
|
|
{"epoch": 0.07342143906020558, "step": 51, "batch_size": 64, "mean": 4.017845153808594, "std": 5.204706192016602, "min": -7.0954742431640625, "p10": -0.4073829650878906, "median": 2.8037109375, "p90": 10.65169372558594, "max": 20.295257568359375, "pos_frac": 0.828125, "sample": [9.928932189941406, 4.169303894042969, 0.8243083953857422, -0.9639854431152344, 0.76055908203125, 0.223846435546875, 9.613632202148438, -0.6189002990722656, 1.8452224731445312, -0.1319427490234375, 12.23846435546875, 7.573680877685547, 2.1122589111328125, 4.631034851074219, 2.3469600677490234, 5.044281005859375, 4.470844268798828, -1.2087554931640625, 10.961448669433594, 5.655216217041016, 4.145425796508789, -0.408905029296875, -0.03413963317871094, 3.45404052734375, 1.0870513916015625, 7.640483856201172, 0.7507095336914062, 5.995048522949219, 0.8511543273925781, 5.6573944091796875, 20.295257568359375, 3.0325698852539062, 0.4062461853027344, 5.4130096435546875, 3.2634124755859375, 15.432525634765625, 1.757537841796875, 15.356582641601562, 5.161293029785156, 2.1188182830810547, 2.804412841796875, 1.1265373229980469, 4.170642852783203, 9.211692810058594, 1.0135231018066406, 0.7321205139160156, 5.7174530029296875, -4.633941650390625, -0.27681922912597656, 19.167022705078125, 14.530197143554688, 7.69049072265625, 7.910335540771484, -0.40383148193359375, 1.5182609558105469, 6.471977233886719, 0.0061054229736328125, 0.28725433349609375, 2.411834716796875, -7.0954742431640625, 2.8879776000976562, -1.77227783203125, 2.803009033203125, 0.0116729736328125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000051.npy"}
|
|
{"epoch": 0.07488986784140969, "step": 52, "batch_size": 64, "mean": 6.196599006652832, "std": 5.214123725891113, "min": -2.4729690551757812, "p10": 1.103017807006836, "median": 4.75640869140625, "p90": 13.493393707275391, "max": 22.800430297851562, "pos_frac": 0.921875, "sample": [1.6666145324707031, 13.226455688476562, 4.645111083984375, 12.957672119140625, 7.5622406005859375, 10.993743896484375, 3.1026344299316406, 7.26751708984375, 7.688873291015625, 14.18267822265625, -0.38004302978515625, 13.607795715332031, 4.6814727783203125, 5.558067321777344, 9.485116958618164, 8.359121322631836, 3.063495635986328, 5.454904556274414, 18.136192321777344, 9.569480895996094, 3.7314109802246094, 1.9317474365234375, 13.194183349609375, 3.112506866455078, 0.16307830810546875, 7.941947937011719, 3.2969512939453125, 2.004636764526367, 9.066267013549805, 22.800430297851562, 9.465850830078125, 1.5529823303222656, 11.030197143554688, 4.5737762451171875, 4.356189727783203, 14.010101318359375, 4.117992401123047, -1.255950927734375, 4.8313446044921875, -2.3230514526367188, -2.4729690551757812, 2.356597900390625, 6.746679306030273, 5.900917053222656, 2.095684051513672, 9.269775390625, 4.251686096191406, 14.15557861328125, 7.321247100830078, 1.6957664489746094, 1.0676155090332031, 3.73291015625, 3.787464141845703, 5.433628082275391, 2.9558563232421875, 9.917217254638672, 6.301525115966797, 1.1856231689453125, 18.886131286621094, -1.3941497802734375, 6.4700164794921875, 4.414726257324219, 2.579439163208008, 1.4916210174560547], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000052.npy"}
|
|
{"epoch": 0.0763582966226138, "step": 53, "batch_size": 64, "mean": 6.325778007507324, "std": 6.903669357299805, "min": -3.0708999633789062, "p10": 0.523998260498047, "median": 4.254323959350586, "p90": 16.2981315612793, "max": 30.56524658203125, "pos_frac": 0.921875, "sample": [-1.9428482055664062, 4.151115417480469, 3.4553680419921875, 3.264801025390625, 1.0823631286621094, 3.7176342010498047, 13.732398986816406, 0.292694091796875, 8.293594360351562, -1.8850250244140625, 2.695526123046875, 29.661224365234375, -2.9641075134277344, 0.6371383666992188, 7.182159423828125, 7.773929595947266, 1.8636322021484375, 11.982040405273438, 3.0870437622070312, 12.725715637207031, 4.557991027832031, 6.1599273681640625, 13.625350952148438, 7.0848236083984375, 17.195030212402344, 1.6130905151367188, 5.008049011230469, 0.86798095703125, 5.729450225830078, 1.8971939086914062, 19.31365966796875, 9.107707977294922, 7.673175811767578, 8.754920959472656, 3.7675132751464844, 0.494232177734375, 0.5934524536132812, 4.244075775146484, 1.0302696228027344, 4.548887252807617, 4.2645721435546875, 8.029268264770508, 1.8793220520019531, 15.376426696777344, -3.0708999633789062, 17.398223876953125, 19.44085693359375, 3.2904052734375, 5.3011627197265625, 3.5430831909179688, 1.9848213195800781, 4.456943511962891, 2.698822021484375, 1.3422698974609375, 3.6613845825195312, 4.546802520751953, 3.0874576568603516, 30.56524658203125, 2.805889129638672, 16.323333740234375, 4.954368591308594, 5.1036376953125, 16.23932647705078, -0.4460926055908203], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000053.npy"}
|
|
{"epoch": 0.07782672540381791, "step": 54, "batch_size": 64, "mean": 5.348155975341797, "std": 5.101955890655518, "min": -1.217885971069336, "p10": 0.31595420837402344, "median": 4.3403520584106445, "p90": 11.947212982177735, "max": 22.118820190429688, "pos_frac": 0.9375, "sample": [8.4288330078125, 5.90570068359375, 8.705440521240234, 0.841827392578125, 0.7428035736083984, 3.294189453125, 0.3109016418457031, 2.7368030548095703, 6.350318908691406, 8.077495574951172, 0.9618701934814453, 6.3130035400390625, 12.673927307128906, 1.9949188232421875, 4.2920989990234375, 11.761909484863281, -0.04087257385253906, 5.216611862182617, 5.663694381713867, 5.5718994140625, -0.010646820068359375, 0.7324752807617188, 17.35938262939453, 0.3277435302734375, 7.993614196777344, 0.11254119873046875, 5.057926177978516, 4.6060333251953125, 6.2200927734375, 6.836570739746094, 1.7481555938720703, -0.3606376647949219, 8.341781616210938, 0.10908889770507812, 1.0015220642089844, 3.3203697204589844, 11.869117736816406, 2.8109054565429688, 5.2220916748046875, 13.555572509765625, 1.7007427215576172, 2.6218795776367188, 22.118820190429688, 10.7506103515625, -1.217885971069336, 3.2128849029541016, 0.3652381896972656, 13.889404296875, 0.7051410675048828, 4.54888916015625, 1.4121170043945312, 4.388605117797852, 11.570770263671875, 1.8563766479492188, 2.6375579833984375, 19.296661376953125, 1.0257987976074219, 5.384105682373047, 2.1015243530273438, 11.980682373046875, 2.5618972778320312, 10.412841796875, 1.2961406707763672, 11.004081726074219], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000054.npy"}
|
|
{"epoch": 0.07929515418502203, "step": 55, "batch_size": 64, "mean": 6.541542053222656, "std": 7.597686767578125, "min": -11.797103881835938, "p10": -1.5170097351074217, "median": 5.444938659667969, "p90": 16.815843200683595, "max": 28.58837890625, "pos_frac": 0.8125, "sample": [7.281639099121094, 10.504524230957031, 5.502403259277344, -5.346221923828125, 26.084747314453125, -2.1622848510742188, 8.087364196777344, 4.341133117675781, 17.565200805664062, -1.0333938598632812, 6.822357177734375, 6.330863952636719, 2.3822097778320312, 11.442985534667969, 20.837860107421875, 9.856155395507812, 5.327484130859375, 9.88970947265625, 28.58837890625, 3.5264129638671875, 11.62359619140625, -1.70001220703125, 4.755226135253906, 9.048210144042969, 24.932464599609375, 0.08758735656738281, 17.093948364257812, 0.10195159912109375, 0.7790966033935547, 8.85848617553711, 4.949333190917969, -1.898468017578125, 6.703174591064453, 0.05930137634277344, 13.02178955078125, 5.0479278564453125, -1.6079902648925781, -0.24721527099609375, 0.19149017333984375, -5.2381134033203125, 10.432605743408203, 3.8825740814208984, 2.1756134033203125, 15.829841613769531, 6.7635345458984375, -1.3047218322753906, 0.6938514709472656, 11.474334716796875, 5.315032958984375, -11.797103881835938, 3.175861358642578, 16.16693115234375, 7.1371002197265625, 4.191143035888672, 7.831695556640625, 5.847484588623047, -0.9326820373535156, 8.785873413085938, 7.852783203125, 14.299148559570312, 5.387474060058594, -0.160736083984375, 5.1598358154296875, 18.05988311767578], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000055.npy"}
|
|
{"epoch": 0.08076358296622614, "step": 56, "batch_size": 64, "mean": 6.195199012756348, "std": 7.477856636047363, "min": -10.192279815673828, "p10": -1.8515127182006832, "median": 5.262235641479492, "p90": 16.39516601562501, "max": 28.020172119140625, "pos_frac": 0.8125, "sample": [-2.94073486328125, 7.6698150634765625, -3.589263916015625, 8.600120544433594, -0.5028171539306641, -10.192279815673828, 11.347419738769531, -1.4351520538330078, 3.7860031127929688, -3.959278106689453, 3.6470947265625, 1.8753242492675781, 5.136863708496094, 17.395233154296875, 2.4052200317382812, 0.06356430053710938, -0.22560691833496094, 11.970802307128906, 10.635517120361328, 8.250892639160156, -2.0299530029296875, 11.769302368164062, 3.3399505615234375, 24.072906494140625, 9.2095947265625, 17.235382080078125, 1.1224498748779297, 0.21138763427734375, 9.87367057800293, 2.8678455352783203, 7.914276123046875, 7.667194366455078, 9.835807800292969, 1.6998538970947266, 11.368873596191406, 12.346988677978516, 0.20641708374023438, 19.853790283203125, 7.603790283203125, 6.515388488769531, 6.035297393798828, 5.21026611328125, 3.0262317657470703, 21.199371337890625, 7.68359375, 13.377632141113281, 6.410650253295898, 11.953414916992188, 0.4239959716796875, 28.020172119140625, 1.6940250396728516, -0.021638870239257812, 4.160865783691406, 14.434661865234375, -8.06903076171875, 3.307069778442383, -0.683319091796875, 21.918487548828125, 5.314205169677734, -3.6510467529296875, 3.2399673461914062, 2.1170578002929688, 6.3523101806640625, 10.414840698242188], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000056.npy"}
|
|
{"epoch": 0.08223201174743025, "step": 57, "batch_size": 64, "mean": 6.902283668518066, "std": 6.7265801429748535, "min": -11.818206787109375, "p10": 0.10590305328369173, "median": 6.822113037109375, "p90": 15.08910140991211, "max": 23.783004760742188, "pos_frac": 0.890625, "sample": [8.943416595458984, 3.328969955444336, 0.5268974304199219, -3.0135498046875, 11.4873046875, -11.818206787109375, 11.997161865234375, 0.4315299987792969, 14.207122802734375, 6.78643798828125, 17.75848388671875, 4.001762390136719, 23.783004760742188, 3.2599029541015625, 7.541831970214844, 7.158233642578125, -1.4504528045654297, 3.8139991760253906, 7.037223815917969, 21.265396118164062, 10.906257629394531, 18.334671020507812, 12.38031005859375, 4.338268280029297, 7.421913146972656, 1.482290267944336, 15.221923828125, 5.444917678833008, 4.324028015136719, 2.4723968505859375, 1.07672119140625, 4.292692184448242, 3.332061767578125, -0.03365135192871094, 6.8577880859375, 8.260345458984375, -0.44620513916015625, 1.4251670837402344, 2.882425308227539, 8.919845581054688, 13.268081665039062, 2.134553909301758, 10.514450073242188, 6.21826171875, 9.826156616210938, -2.9061317443847656, 9.559921264648438, 7.3890380859375, 9.27667236328125, 3.5583953857421875, 2.4605846405029297, 19.942535400390625, 5.659290313720703, 3.89678955078125, 7.4914703369140625, 4.351707458496094, 0.7591934204101562, 12.883522033691406, 13.269935607910156, -6.981513977050781, 14.779182434082031, 22.184860229492188, 7.955556869506836, 8.313026428222656], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000057.npy"}
|
|
{"epoch": 0.08370044052863436, "step": 58, "batch_size": 64, "mean": 7.606607437133789, "std": 8.165129661560059, "min": -10.152931213378906, "p10": -0.5155685424804681, "median": 5.919193267822266, "p90": 18.29417572021485, "max": 29.246353149414062, "pos_frac": 0.890625, "sample": [15.936080932617188, -2.107076644897461, 0.46447181701660156, 5.265361785888672, 3.3338470458984375, 4.318328857421875, 21.831947326660156, 2.0912857055664062, 15.412361145019531, 3.0327224731445312, 6.765773773193359, 6.7300262451171875, 8.333450317382812, 4.064970016479492, 2.7307357788085938, 12.924434661865234, 11.371528625488281, 9.950546264648438, 4.890159606933594, 3.325990676879883, 8.502288818359375, 15.701705932617188, 14.556747436523438, -1.3351974487304688, 12.424690246582031, 1.6355133056640625, 10.009265899658203, 6.796220779418945, 23.803817749023438, 4.107866287231445, 29.246353149414062, 8.890453338623047, 6.573024749755859, 0.03989410400390625, 16.563034057617188, 0.8000240325927734, 15.751693725585938, 4.458965301513672, 3.2061004638671875, -2.8338470458984375, 2.613506317138672, 3.0724658966064453, 25.879486083984375, -2.5120697021484375, 11.357452392578125, -0.7536239624023438, 1.0633659362792969, 7.3648834228515625, -6.42193603515625, 25.989349365234375, 2.0818042755126953, 6.9104766845703125, 0.8712863922119141, 15.47216796875, 11.456069946289062, -10.152931213378906, 22.6253662109375, 1.011749267578125, 2.54608154296875, 8.016685485839844, 16.96973419189453, 2.05126953125, 0.8828964233398438, 18.861793518066406], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000058.npy"}
|
|
{"epoch": 0.08516886930983847, "step": 59, "batch_size": 64, "mean": 8.48550796508789, "std": 8.816734313964844, "min": -8.84246826171875, "p10": -1.3104663848876945, "median": 7.137125015258789, "p90": 21.697581481933593, "max": 35.818206787109375, "pos_frac": 0.875, "sample": [9.58697509765625, 10.224563598632812, 17.913421630859375, 21.7598876953125, 17.642799377441406, 4.7587738037109375, 35.818206787109375, 22.840118408203125, 7.804483413696289, 18.497177124023438, 0.8656806945800781, 7.402362823486328, -4.748832702636719, 3.0130481719970703, 24.68505859375, -0.6302986145019531, 2.6098241806030273, 15.39691162109375, 6.70794677734375, 22.909713745117188, 1.3611087799072266, 2.5678863525390625, 1.4872970581054688, 1.467803955078125, 5.21112060546875, 28.801406860351562, 2.836650848388672, 2.077066421508789, 9.104904174804688, -8.84246826171875, 10.180953979492188, -1.6348533630371094, 9.957954406738281, 8.445625305175781, 9.740737915039062, 20.21930694580078, 3.763336181640625, 13.3072509765625, 8.230941772460938, 3.9709091186523438, -6.411796569824219, -4.133380889892578, 6.181549072265625, -1.6019668579101562, 4.93293571472168, 10.628211975097656, 7.51957893371582, 3.1489486694335938, 8.611701965332031, 2.960355758666992, 1.9563789367675781, 6.87188720703125, 3.203367233276367, 11.192935943603516, 6.52415657043457, 16.47795867919922, 10.906982421875, 4.64849853515625, 4.03240966796875, -2.7223129272460938, 19.35271453857422, 21.552200317382812, 7.731746673583984, 22.194732666015625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000059.npy"}
|
|
{"epoch": 0.08663729809104258, "step": 60, "batch_size": 64, "mean": 6.686439514160156, "std": 7.756203651428223, "min": -16.633148193359375, "p10": -2.1424306869506835, "median": 7.36073112487793, "p90": 13.988619232177735, "max": 27.509864807128906, "pos_frac": 0.828125, "sample": [8.459182739257812, -5.6185302734375, 2.118865966796875, 12.975883483886719, 1.2719917297363281, 3.520679473876953, 12.87717056274414, 8.716339111328125, 0.844146728515625, -16.633148193359375, 7.390598297119141, -2.1610107421875, 2.1602249145507812, 4.073949813842773, 12.620384216308594, 7.0929107666015625, 4.881439208984375, 12.982421875, 4.451698303222656, 11.508934020996094, 13.863868713378906, 14.527297973632812, 21.2705078125, 10.1064453125, 8.825698852539062, 8.378250122070312, -0.75140380859375, 7.674217224121094, 12.1988525390625, 0.13675689697265625, 11.348682403564453, 20.10779571533203, 4.405979156494141, 6.990631103515625, 8.83755111694336, -2.1577091217041016, 11.015617370605469, 8.973281860351562, 2.4088668823242188, -6.6862945556640625, 13.083454132080078, 3.523303985595703, 6.61083984375, 9.510330200195312, 4.969154357910156, -1.0380096435546875, -12.606441497802734, 18.08880615234375, 22.737442016601562, 12.7137451171875, 14.042083740234375, 3.1203460693359375, -2.44464111328125, -1.6628646850585938, 12.031322479248047, 7.330863952636719, 27.509864807128906, 10.76983642578125, 1.1280517578125, 10.330404281616211, -2.106781005859375, 9.217582702636719, 4.739740371704102, 1.3246726989746094], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000060.npy"}
|
|
{"epoch": 0.0881057268722467, "step": 61, "batch_size": 64, "mean": 8.251806259155273, "std": 11.42257308959961, "min": -11.391258239746094, "p10": -1.822412872314453, "median": 5.608554840087891, "p90": 20.739484405517583, "max": 63.036376953125, "pos_frac": 0.78125, "sample": [3.1688480377197266, 1.9271221160888672, -1.3568649291992188, 0.39003753662109375, 17.361358642578125, -0.26407814025878906, -0.042781829833984375, 8.683723449707031, -1.8062362670898438, 28.88922119140625, 25.151771545410156, 22.45538330078125, 29.17474365234375, 15.149360656738281, 9.170394897460938, 10.832115173339844, -1.6504974365234375, 1.5334415435791016, 12.748458862304688, 13.293136596679688, 3.524707794189453, 5.668952941894531, 18.053604125976562, 9.573131561279297, 6.9686279296875, 7.410074234008789, 5.5479278564453125, 6.68817138671875, 10.064018249511719, 9.608482360839844, -0.5720748901367188, -1.829345703125, 13.373146057128906, 19.712005615234375, 19.25, 25.47479248046875, 19.5472412109375, 1.4660224914550781, -2.07568359375, 14.155685424804688, 3.1213951110839844, 0.32712364196777344, 14.514205932617188, 3.10235595703125, 0.9226360321044922, 21.179832458496094, 1.11639404296875, 4.1058807373046875, 63.036376953125, 14.552391052246094, 14.373237609863281, -1.9690971374511719, -11.391258239746094, 13.955497741699219, 2.56646728515625, -5.9608306884765625, 5.54815673828125, 2.9368057250976562, -8.654556274414062, 3.227874755859375, 7.893798828125, -6.966220855712891, -1.4332275390625, 1.592193603515625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000061.npy"}
|
|
{"epoch": 0.08957415565345081, "step": 62, "batch_size": 64, "mean": 6.19963264465332, "std": 8.31264877319336, "min": -11.350799560546875, "p10": -3.7055643081665037, "median": 5.796904563903809, "p90": 16.826065063476566, "max": 33.854042053222656, "pos_frac": 0.84375, "sample": [9.83270263671875, 1.4239959716796875, 0.7509164810180664, 12.005378723144531, 5.72535514831543, 13.66326904296875, 8.314640045166016, 0.9784393310546875, 9.863471984863281, -6.0883636474609375, 16.232620239257812, 7.252204895019531, 1.912527084350586, -0.24002647399902344, 12.035133361816406, 4.286170959472656, 10.91436767578125, 6.337982177734375, 0.3525352478027344, 5.9503173828125, 7.160442352294922, 7.886486053466797, 1.7104110717773438, 5.916797637939453, -5.050811767578125, 9.915390014648438, 4.840488433837891, 9.35223388671875, -0.36562156677246094, 33.854042053222656, 18.686790466308594, 9.993247985839844, 7.109657287597656, 2.5462799072265625, 0.8797073364257812, 6.280693054199219, 1.5647048950195312, 20.399185180664062, -11.350799560546875, -4.63397216796875, -6.906494140625, 0.8267536163330078, 1.0824165344238281, 1.0285606384277344, 1.8219451904296875, 10.283502578735352, 17.080398559570312, 10.353076934814453, -3.7665767669677734, 17.720672607421875, 8.871448516845703, 5.77290153503418, -9.207595825195312, 4.221466064453125, 14.089447021484375, 3.8624191284179688, 3.708385467529297, 3.9838638305664062, 30.323471069335938, 22.871353149414062, 7.423004150390625, 0.8753700256347656, -3.563201904296875, 5.8209075927734375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000062.npy"}
|
|
{"epoch": 0.09104258443465492, "step": 63, "batch_size": 64, "mean": 8.366212844848633, "std": 9.040850639343262, "min": -14.1954345703125, "p10": -0.7275726318359369, "median": 7.814731597900391, "p90": 18.971240234375006, "max": 36.428436279296875, "pos_frac": 0.875, "sample": [27.537765502929688, 9.115875244140625, 5.067602157592773, 9.643909454345703, 4.070892333984375, 1.2651290893554688, 5.513023376464844, -8.974639892578125, 7.263755798339844, 7.500679016113281, -14.1954345703125, 17.767303466796875, 9.829444885253906, 7.956733703613281, 3.1802101135253906, -0.9778499603271484, 19.487213134765625, 4.712759017944336, 14.705413818359375, 22.502487182617188, 13.700302124023438, 15.498931884765625, 7.005851745605469, 0.0053195953369140625, -0.9775848388671875, 12.47987174987793, -4.421836853027344, 0.16168594360351562, 5.7081298828125, 3.499530792236328, 14.447860717773438, 9.549690246582031, 8.027030944824219, 10.434192657470703, 11.778709411621094, 13.388504028320312, 16.17015838623047, 15.241050720214844, 5.859004974365234, 2.080841064453125, -0.1442108154296875, 14.699722290039062, 3.056680679321289, 5.950401306152344, -12.831901550292969, 14.844053268432617, 8.828369140625, 1.498748779296875, 1.3251495361328125, 7.6727294921875, 2.9940643310546875, 16.336715698242188, 20.476577758789062, 8.750221252441406, 19.6114501953125, 0.9282150268554688, 9.955923080444336, 36.428436279296875, -4.0948028564453125, 16.280109405517578, 4.247901916503906, 28.8133544921875, 4.727272033691406, 12.472885131835938], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000063.npy"}
|
|
{"epoch": 0.09251101321585903, "step": 64, "batch_size": 64, "mean": 9.82172966003418, "std": 11.361506462097168, "min": -20.482070922851562, "p10": -1.27231216430664, "median": 8.22147274017334, "p90": 24.11358795166016, "max": 52.93353271484375, "pos_frac": 0.796875, "sample": [0.7645416259765625, 19.930511474609375, 30.181777954101562, 8.672168731689453, 5.603782653808594, 13.191261291503906, 3.056131362915039, 18.473716735839844, -0.3956279754638672, 7.7022705078125, 2.4156036376953125, 11.650726318359375, 0.4018440246582031, 5.7241058349609375, 13.440299987792969, 34.75785827636719, 5.309268951416016, -2.1266441345214844, 20.08959197998047, 8.443361282348633, 12.202720642089844, -3.1072463989257812, 31.946914672851562, 7.999584197998047, -0.43831443786621094, 6.726280212402344, -1.8232669830322266, 7.853208541870117, -20.482070922851562, -0.48636817932128906, 8.575286865234375, -1.535797119140625, 52.93353271484375, 6.312650680541992, 1.9304695129394531, 11.83144760131836, 19.733978271484375, 23.150863647460938, 11.544097900390625, 6.286231994628906, 6.44133186340332, 24.703598022460938, 13.561958312988281, 10.363922119140625, -0.46337890625, -10.950157165527344, 26.278717041015625, 3.7201385498046875, 24.52618408203125, -0.6860504150390625, 17.362869262695312, 13.945068359375, 14.346305847167969, -1.5235671997070312, 15.089469909667969, -0.35811424255371094, 5.352779388427734, 8.651317596435547, 15.255645751953125, 13.133346557617188, 1.6118850708007812, 13.490432739257812, 19.908035278320312, 6.388214111328125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000064.npy"}
|
|
{"epoch": 0.09397944199706314, "step": 65, "batch_size": 64, "mean": 9.843679428100586, "std": 11.074901580810547, "min": -25.3094482421875, "p10": -1.0071685791015623, "median": 8.31739616394043, "p90": 24.64920959472657, "max": 34.74913024902344, "pos_frac": 0.828125, "sample": [34.74913024902344, 27.152801513671875, 21.138275146484375, 13.221473693847656, 6.00031852722168, 10.749588012695312, 0.13719940185546875, 14.221458435058594, 3.990741729736328, 22.632789611816406, 17.42005157470703, 2.0376644134521484, -0.26520729064941406, 25.434539794921875, 31.332382202148438, 1.2137985229492188, 11.307369232177734, 2.691638946533203, -4.68206787109375, 5.663490295410156, 0.7986831665039062, 19.839126586914062, 7.526771545410156, -25.3094482421875, 18.574783325195312, 9.7593994140625, -4.703521728515625, 8.334121704101562, 15.765907287597656, 5.901782989501953, 5.652992248535156, 5.652973175048828, -2.541759490966797, 5.586214065551758, 32.05439758300781, 8.300670623779297, 22.741966247558594, 17.60519027709961, 27.51471710205078, 8.60921859741211, -1.0384521484375, -14.045135498046875, 10.420120239257812, 2.467214584350586, 9.029327392578125, 20.821273803710938, 27.161354064941406, -2.74017333984375, 5.78692626953125, 22.772865295410156, 13.290115356445312, 5.4484100341796875, 22.8167724609375, -0.934173583984375, 13.12701416015625, 9.96954345703125, 6.3671722412109375, 2.3686981201171875, 17.8892822265625, 9.903656005859375, -0.32607269287109375, 8.290752410888672, -0.07962989807128906, 7.416971206665039], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000065.npy"}
|
|
{"epoch": 0.09544787077826726, "step": 66, "batch_size": 64, "mean": 8.99482536315918, "std": 11.073993682861328, "min": -11.431182861328125, "p10": -1.1466915130615234, "median": 7.3815813064575195, "p90": 20.896685409545903, "max": 52.07997131347656, "pos_frac": 0.84375, "sample": [8.553215026855469, 52.07997131347656, 0.3626251220703125, 16.919456481933594, 14.028610229492188, 13.430046081542969, 1.9204578399658203, -0.3387184143066406, 10.910287857055664, 8.04791259765625, -9.910400390625, 2.349740982055664, 7.401735305786133, -1.1923980712890625, 13.078594207763672, 4.0394134521484375, 5.131780624389648, 0.19786643981933594, 2.3186683654785156, 21.294815063476562, 8.670066833496094, 11.389934539794922, 0.5678138732910156, 11.295578002929688, 18.712947845458984, 2.1756210327148438, 5.651161193847656, -3.7758216857910156, -4.161325454711914, -11.431182861328125, 12.207710266113281, 18.538352966308594, 19.967716217041016, 8.608001708984375, 3.5211029052734375, 13.007171630859375, 6.677114486694336, -0.43767356872558594, 2.3387413024902344, 15.664749145507812, 11.594078063964844, 3.5649490356445312, -1.0400428771972656, 7.361427307128906, 6.306056976318359, -8.909004211425781, 10.333656311035156, 1.4815521240234375, -4.673492431640625, 14.993431091308594, 4.708518981933594, 22.81536865234375, 30.375961303710938, 23.669456481933594, 5.267913818359375, 19.055984497070312, 2.2236595153808594, 9.247783660888672, 17.36400604248047, 2.5066661834716797, 36.809844970703125, 1.9619255065917969, 34.15118408203125, 14.686492919921875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000066.npy"}
|
|
{"epoch": 0.09691629955947137, "step": 67, "batch_size": 64, "mean": 9.2771635055542, "std": 11.012858390808105, "min": -14.729263305664062, "p10": -1.2801784515380858, "median": 6.914272308349609, "p90": 23.022106170654297, "max": 51.16180419921875, "pos_frac": 0.828125, "sample": [24.300491333007812, 4.411960601806641, 22.61646270751953, -0.4908599853515625, 2.355318069458008, -0.3875732421875, 18.539535522460938, 5.632440567016602, -6.267997741699219, 2.2320938110351562, 18.586200714111328, 10.629295349121094, 4.647308349609375, 17.99786376953125, -0.6185150146484375, -4.72566032409668, 10.396743774414062, 10.912399291992188, 1.8139114379882812, -14.729263305664062, 23.195953369140625, 9.555938720703125, -1.1672706604003906, 0.26160430908203125, 6.18524169921875, 1.8563385009765625, 23.58246612548828, 7.712369918823242, -1.3285675048828125, 6.88494873046875, 2.2126922607421875, 16.64992904663086, 7.822971343994141, 3.111787796020508, 2.6628494262695312, 10.153060913085938, 15.885339736938477, 6.363672256469727, 1.5104446411132812, -5.8446197509765625, 6.661674499511719, 19.481582641601562, 4.894630432128906, 3.4858264923095703, 29.2427978515625, 15.710365295410156, 6.99476432800293, 19.607215881347656, 15.071918487548828, 16.02978515625, 12.895957946777344, 6.176609039306641, 12.668359756469727, 36.57611083984375, 6.943595886230469, 1.2806682586669922, 51.16180419921875, 3.081552505493164, -2.7832412719726562, 28.268600463867188, 22.06756591796875, 8.319194793701172, -4.1589202880859375, 8.940715789794922], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000067.npy"}
|
|
{"epoch": 0.09838472834067548, "step": 68, "batch_size": 64, "mean": 8.720624923706055, "std": 9.045540809631348, "min": -8.624038696289062, "p10": -0.11763000488281167, "median": 6.5119218826293945, "p90": 22.16165618896485, "max": 40.67176818847656, "pos_frac": 0.890625, "sample": [-5.8456268310546875, 2.764007568359375, 26.5841064453125, 9.898574829101562, 5.636892318725586, -6.6906280517578125, 9.841796875, 23.76605224609375, 1.7211990356445312, 0.699371337890625, 14.328201293945312, 15.611663818359375, 19.572540283203125, 12.844200134277344, 11.587394714355469, -0.8369979858398438, 6.905612945556641, 13.538127899169922, 4.444206237792969, 9.068382263183594, -1.9643020629882812, 5.220457077026367, 5.865777969360352, 2.554229736328125, 7.991203308105469, 12.729393005371094, 19.6685791015625, 8.504302978515625, 9.565719604492188, 20.557144165039062, 22.84930419921875, 24.149005889892578, 1.8281402587890625, 6.906272888183594, 10.749977111816406, 1.9155941009521484, -0.4677734375, 6.118230819702148, 2.7761154174804688, 5.546852111816406, 18.318004608154297, 2.4803466796875, 11.436309814453125, -8.624038696289062, 7.271034240722656, 5.441596984863281, 4.204460144042969, 23.12236785888672, 2.4542083740234375, 1.7357501983642578, 2.224336624145508, 40.67176818847656, 26.9986572265625, 17.87677001953125, 14.784896850585938, 1.0015716552734375, 2.5213623046875, 4.971330642700195, 12.061622619628906, -2.35223388671875, 4.620086669921875, 2.7425880432128906, 5.1573944091796875, 8.496490478515625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000068.npy"}
|
|
{"epoch": 0.09985315712187959, "step": 69, "batch_size": 64, "mean": 10.385248184204102, "std": 10.408534049987793, "min": -5.082916259765625, "p10": -0.6331836700439449, "median": 7.135639190673828, "p90": 22.245738220214847, "max": 48.047393798828125, "pos_frac": 0.875, "sample": [4.5736236572265625, 6.833095550537109, 32.725555419921875, 8.990036010742188, 12.889068603515625, 17.720619201660156, -5.082916259765625, -1.4947662353515625, -0.2591590881347656, 4.604209899902344, 5.68475341796875, 9.70804214477539, -1.269775390625, 10.119209289550781, 7.025363922119141, 15.703269958496094, 28.820907592773438, 7.721931457519531, 19.170379638671875, 16.175430297851562, -1.8064842224121094, 26.03289794921875, 13.575546264648438, 10.238750457763672, 20.43523406982422, 14.82257080078125, 4.369789123535156, 16.727691650390625, 13.278003692626953, 6.000825881958008, 0.5783805847167969, 0.3966560363769531, 20.175689697265625, 48.047393798828125, 4.860103607177734, 21.160736083984375, 5.93804931640625, 16.33984375, 19.141860961914062, 1.1719436645507812, 1.6961517333984375, 17.618881225585938, 11.188339233398438, 1.5008163452148438, -0.7934799194335938, 15.393966674804688, 7.245914459228516, 39.41881561279297, 4.34588623046875, 8.497011184692383, 6.671989440917969, -5.007900238037109, 6.395927429199219, 3.4221343994140625, 2.4389801025390625, 6.779998779296875, 23.138824462890625, 22.710739135742188, 1.79193115234375, 20.502281188964844, 1.9581832885742188, 1.3828849792480469, 5.768239974975586, -1.2549896240234375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000069.npy"}
|
|
{"epoch": 0.1013215859030837, "step": 70, "batch_size": 64, "mean": 11.182601928710938, "std": 11.99088191986084, "min": -10.09375, "p10": -1.1931610107421875, "median": 10.888038635253906, "p90": 26.964770507812506, "max": 44.717987060546875, "pos_frac": 0.84375, "sample": [6.648899078369141, 0.6630859375, 12.391780853271484, 20.24262237548828, 24.017181396484375, 12.816164016723633, -8.8916015625, -1.1382598876953125, 14.595161437988281, 4.349206924438477, -2.099578857421875, 13.702323913574219, -2.772329330444336, 9.451278686523438, 41.57307434082031, 1.9466896057128906, 11.024017333984375, 27.748260498046875, 11.957513809204102, 44.717987060546875, 12.367874145507812, 10.752059936523438, 2.0876083374023438, 32.75682067871094, 0.8314285278320312, 18.521644592285156, 20.798812866210938, 13.082284927368164, -1.8312129974365234, 7.598356246948242, 5.8943634033203125, 28.89966583251953, 1.8936729431152344, 0.9515113830566406, 0.9533462524414062, -0.6423912048339844, 14.81949234008789, 3.0190887451171875, 13.163436889648438, -1.8620681762695312, 3.709747314453125, 4.099020004272461, -1.2166900634765625, 1.69598388671875, 11.401216506958008, 1.8226318359375, 20.880203247070312, 25.136627197265625, -10.09375, 4.900054931640625, 19.0933837890625, 3.059040069580078, 1.9823188781738281, 35.497283935546875, 13.524539947509766, -0.9472694396972656, 37.2225341796875, 11.192581176757812, 22.2510986328125, 17.649736404418945, 3.8650150299072266, 24.46776580810547, 14.557197570800781, 22.936935424804688], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000070.npy"}
|
|
{"epoch": 0.1027900146842878, "step": 71, "batch_size": 64, "mean": 12.864418029785156, "std": 12.86440372467041, "min": -10.091415405273438, "p10": 0.5523014068603531, "median": 10.650421142578125, "p90": 24.473324584960945, "max": 53.0882568359375, "pos_frac": 0.890625, "sample": [-5.677177429199219, 45.21026611328125, 6.527332305908203, 2.1842498779296875, 6.698482513427734, 13.760391235351562, 6.713092803955078, 12.629180908203125, 17.69512176513672, 5.670404434204102, 22.327957153320312, 7.370109558105469, 18.722702026367188, 4.892284393310547, 3.2196884155273438, 7.453704833984375, 12.370410919189453, 16.642959594726562, -0.508331298828125, 2.050813674926758, 4.798332214355469, 46.67076110839844, 7.608085632324219, 7.421440124511719, 17.513290405273438, 17.144695281982422, 6.291477203369141, 12.821956634521484, 13.616107940673828, -9.043563842773438, 4.604957580566406, 13.751754760742188, 12.185211181640625, -0.08991813659667969, 5.2134857177734375, 18.977294921875, 19.827049255371094, -0.299774169921875, 22.832015991210938, 16.71442413330078, 5.597709655761719, 14.525505065917969, 7.493316650390625, 17.82537841796875, 17.21441650390625, 19.496185302734375, 3.8965301513671875, 39.36921691894531, 51.666534423828125, 53.0882568359375, 2.5680999755859375, 25.176742553710938, -10.091415405273438, -0.9131507873535156, 14.589157104492188, 7.071197509765625, 7.969058990478516, 10.883453369140625, 20.311065673828125, 17.537826538085938, 33.2581787109375, 3.3586349487304688, 6.500705718994141, 10.417388916015625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000071.npy"}
|
|
{"epoch": 0.10425844346549193, "step": 72, "batch_size": 64, "mean": 13.383831024169922, "std": 13.651796340942383, "min": -14.252212524414062, "p10": -2.1440643310546847, "median": 10.97158432006836, "p90": 33.22061004638674, "max": 57.700927734375, "pos_frac": 0.890625, "sample": [18.94432830810547, 15.761274337768555, 6.149936676025391, 13.631874084472656, 6.954780578613281, 3.894132614135742, 9.483245849609375, 28.656173706054688, -14.252212524414062, 26.053314208984375, 15.807476043701172, 14.942501068115234, 7.968442916870117, 0.624725341796875, 17.670146942138672, 10.139629364013672, 13.491828918457031, 27.1636962890625, 15.100410461425781, 35.045013427734375, 40.474609375, -3.6176509857177734, 7.802970886230469, 10.221647262573242, 17.38910675048828, 3.5687255859375, 19.097938537597656, 10.499588012695312, 11.443580627441406, 17.01996612548828, 4.034507751464844, 5.6187744140625, 12.099884033203125, -13.684127807617188, 37.57768249511719, 28.963668823242188, 20.23328399658203, 7.220733642578125, 23.577835083007812, -4.522270202636719, 9.065711975097656, 43.00144958496094, 57.700927734375, -6.9632110595703125, 10.004932403564453, 26.299819946289062, 7.9494781494140625, 8.446086883544922, 1.3062057495117188, 37.1641845703125, -10.70208740234375, 4.482418060302734, 2.7953414916992188, 18.510894775390625, 14.592414855957031, 19.608917236328125, 10.3311767578125, 16.487403869628906, 37.080902099609375, 12.31271743774414, 6.676822662353516, 3.6534500122070312, -3.3306884765625, 1.8387336730957031], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000072.npy"}
|
|
{"epoch": 0.10572687224669604, "step": 73, "batch_size": 64, "mean": 13.05124568939209, "std": 18.70128059387207, "min": -26.7335205078125, "p10": -8.59512481689453, "median": 10.424673080444336, "p90": 33.64949836730957, "max": 67.76078796386719, "pos_frac": 0.78125, "sample": [31.06812286376953, -12.874237060546875, 7.121650695800781, 15.861061096191406, 7.351163864135742, 36.09545135498047, -6.239105224609375, 26.195053100585938, 7.50360107421875, -1.1484756469726562, 29.3570556640625, 33.393009185791016, 24.217063903808594, -1.2075042724609375, 0.8258514404296875, -15.222824096679688, 57.653778076171875, -1.9357223510742188, 10.314929962158203, 18.939170837402344, 17.98255157470703, 8.185462951660156, 10.534416198730469, 1.2009639739990234, 2.090961456298828, 10.702888488769531, -17.485050201416016, 11.099388122558594, 31.335357666015625, 15.415573120117188, 13.15240478515625, 2.9539566040039062, 57.793212890625, -11.7041015625, 5.160968780517578, 31.015647888183594, 32.12598419189453, -9.978256225585938, 4.205976486206055, 2.922565460205078, -1.9307403564453125, -5.514427185058594, -9.453536987304688, 6.962429046630859, 16.076744079589844, 16.592864990234375, 3.037233352661133, 5.639547348022461, 21.372215270996094, 43.61058044433594, 15.15081787109375, 6.468036651611328, 23.195343017578125, 6.437843322753906, 14.53070068359375, -6.5921630859375, 15.063873291015625, 55.92884826660156, 25.744094848632812, 67.76078796386719, 3.175203323364258, 33.759422302246094, 19.017593383789062, -26.7335205078125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000073.npy"}
|
|
{"epoch": 0.10719530102790015, "step": 74, "batch_size": 64, "mean": 13.770397186279297, "std": 20.495498657226562, "min": -40.672569274902344, "p10": -2.940638351440428, "median": 10.703973770141602, "p90": 31.936056518554697, "max": 119.9168701171875, "pos_frac": 0.8125, "sample": [27.11663818359375, 9.399299621582031, 12.487579345703125, 0.043552398681640625, 24.663726806640625, 7.354854583740234, 9.921146392822266, 55.74651336669922, 2.000995635986328, 13.633766174316406, 14.201881408691406, 10.297403335571289, 19.799091339111328, 1.387725830078125, 16.985626220703125, 119.9168701171875, 8.772201538085938, 29.798362731933594, 14.440227508544922, 2.8229293823242188, 2.157583236694336, 49.82801818847656, 3.0081710815429688, 32.852210998535156, 26.218490600585938, 10.72967529296875, -3.523681640625, 1.3299102783203125, -0.5428066253662109, -19.51378631591797, 15.958026885986328, -1.5802040100097656, -0.12471771240234375, 8.437385559082031, -40.672569274902344, 21.212562561035156, -1.55316162109375, 25.498714447021484, 5.174890518188477, -3.7998390197753906, -9.825645446777344, 11.571342468261719, 19.38848876953125, 25.784393310546875, -0.872467041015625, 44.8765869140625, 13.721504211425781, 4.0574798583984375, 8.734298706054688, 8.61346435546875, 10.678272247314453, 35.887176513671875, 19.149139404296875, 21.989105224609375, 38.39556121826172, 16.909225463867188, 5.457183837890625, 16.006637573242188, 25.427993774414062, 27.156890869140625, 0.6084938049316406, -3.7906951904296875, -3.8332443237304688, 13.328998565673828], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000074.npy"}
|
|
{"epoch": 0.10866372980910426, "step": 75, "batch_size": 64, "mean": 19.06302833557129, "std": 18.70346450805664, "min": -10.99371337890625, "p10": -2.3711032867431623, "median": 15.522804260253906, "p90": 46.05704116821289, "max": 70.86912536621094, "pos_frac": 0.875, "sample": [-0.4564399719238281, 22.511688232421875, 16.36800765991211, 14.022758483886719, 18.336341857910156, 13.303258895874023, 9.796661376953125, 6.406715393066406, 42.080841064453125, 61.5867919921875, 30.483306884765625, 14.601274490356445, -6.168113708496094, 27.623199462890625, 70.86912536621094, 14.66622543334961, 37.5938720703125, 59.82606506347656, 9.29452133178711, 0.021558761596679688, 42.171669006347656, 23.513290405273438, 15.16415786743164, 10.769187927246094, 18.389373779296875, 45.88910675048828, 46.12901306152344, 3.188322067260742, 8.619754791259766, 29.22915267944336, 17.949371337890625, 16.480228424072266, 5.005775451660156, 62.66014099121094, -4.7981719970703125, -3.1916732788085938, 25.195556640625, -10.99371337890625, 19.627769470214844, 18.721027374267578, 5.292552947998047, 12.161590576171875, -6.7256011962890625, 6.9317779541015625, -7.397726058959961, 27.2821044921875, 2.5830726623535156, 11.239086151123047, 35.213348388671875, 24.970367431640625, 4.449886322021484, -5.9906158447265625, 8.682634353637695, 5.773017883300781, 15.738998413085938, 4.994222640991211, 8.302780151367188, 20.35919189453125, 16.89288330078125, 17.649864196777344, 35.91869354248047, 51.386627197265625, 56.531463623046875, 15.306610107421875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000075.npy"}
|
|
{"epoch": 0.11013215859030837, "step": 76, "batch_size": 64, "mean": 12.300118446350098, "std": 14.21257495880127, "min": -36.66453552246094, "p10": -1.383794975280761, "median": 11.706378936767578, "p90": 27.822554016113283, "max": 63.71696472167969, "pos_frac": 0.859375, "sample": [13.112520217895508, 7.394985198974609, 2.150632858276367, 22.850723266601562, 2.3068161010742188, 17.330001831054688, 12.478790283203125, 24.298385620117188, 27.521358489990234, 12.872230529785156, 13.495002746582031, 19.686386108398438, 9.549509048461914, 30.28864288330078, 4.335807800292969, 4.928411483764648, 8.786613464355469, 63.71696472167969, 30.075538635253906, 4.994621276855469, -3.565032958984375, 11.602428436279297, 0.22000694274902344, 11.021705627441406, -13.193069458007812, 7.492761611938477, 5.117237091064453, 12.814323425292969, 5.8996429443359375, 7.618915557861328, 27.94879150390625, 17.871246337890625, 20.952316284179688, 8.970657348632812, 26.285316467285156, 4.061857223510742, -1.7114181518554688, 23.459930419921875, 3.892641067504883, -8.245742797851562, 6.947198867797852, 0.8205432891845703, -5.680910110473633, -3.6485595703125, 15.4547119140625, 26.654281616210938, 46.539947509765625, 27.527999877929688, 7.867168426513672, 4.650054931640625, 11.598464965820312, -0.2026653289794922, 14.948600769042969, 14.406776428222656, 14.726554870605469, 11.81032943725586, -0.6193408966064453, 13.128044128417969, 32.79363250732422, 20.805137634277344, 21.01800537109375, 29.22418212890625, 12.413490295410156, -36.66453552246094], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000076.npy"}
|
|
{"epoch": 0.11160058737151249, "step": 77, "batch_size": 64, "mean": 19.283706665039062, "std": 19.269989013671875, "min": -32.8404541015625, "p10": 0.5663272857666017, "median": 19.556962966918945, "p90": 48.041534423828125, "max": 65.03570556640625, "pos_frac": 0.90625, "sample": [14.975730895996094, 42.08271026611328, 28.854778289794922, 36.850040435791016, 37.592323303222656, 0.7982177734375, 55.42301940917969, 20.306507110595703, 9.06045150756836, 9.619693756103516, 4.944143295288086, 32.40193176269531, 34.03478240966797, 5.551456451416016, 6.134368896484375, 20.80101776123047, -32.8404541015625, 19.28363800048828, 30.220489501953125, 29.683578491210938, 13.637863159179688, -2.5330772399902344, 8.448951721191406, 0.4669456481933594, 2.1930694580078125, 47.57807922363281, 2.631063461303711, 23.37220001220703, 9.830230712890625, -5.086721420288086, 29.908344268798828, 25.285202026367188, 12.756546020507812, 58.10008239746094, 32.01152801513672, 9.216190338134766, 50.851776123046875, 65.03570556640625, 18.111167907714844, 9.826217651367188, -27.659698486328125, 7.202613830566406, -10.1929931640625, 3.9591598510742188, 11.551939010620117, 25.45903778076172, 30.205467224121094, 24.559194564819336, 24.31696319580078, 25.302284240722656, 7.127635955810547, -0.8816070556640625, 4.89415168762207, 23.324661254882812, 25.179115295410156, 22.498687744140625, 3.1392974853515625, 49.98506164550781, 60.24290466308594, 9.879840850830078, 21.659683227539062, 48.24015808105469, 19.83028793334961, 6.91363525390625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000077.npy"}
|
|
{"epoch": 0.1130690161527166, "step": 78, "batch_size": 64, "mean": 14.692001342773438, "std": 15.56376838684082, "min": -15.874916076660156, "p10": -1.396270370483398, "median": 14.677362442016602, "p90": 36.72415313720704, "max": 67.34228515625, "pos_frac": 0.84375, "sample": [6.1219940185546875, 1.1440448760986328, 34.50030517578125, 12.955177307128906, -0.5603256225585938, 40.20988464355469, 15.444683074951172, 67.34228515625, 24.401641845703125, 20.14065170288086, 19.71578598022461, 7.671985626220703, -4.4231109619140625, 14.826595306396484, 20.72952651977539, 19.851215362548828, -0.5352325439453125, 7.6136474609375, -4.9853515625, 1.1480636596679688, 6.659219741821289, 15.310773849487305, 23.28799819946289, 17.285202026367188, 17.93375015258789, 14.919116973876953, 16.22180938720703, 13.1893310546875, 18.360809326171875, 5.507299423217773, 37.67723083496094, 14.857770919799805, 5.8601837158203125, -6.691707611083984, 23.044174194335938, -6.493499755859375, 44.475128173828125, 19.527454376220703, 38.492950439453125, 2.0633392333984375, 34.47309494018555, 3.7234039306640625, 45.162742614746094, 30.001415252685547, 23.929290771484375, 1.158477783203125, 10.942123413085938, 3.3632545471191406, 12.42623519897461, 8.008514404296875, 4.947898864746094, -1.5539207458496094, 14.528129577636719, -1.0284194946289062, 26.123779296875, -15.874916076660156, 45.27430725097656, 4.534717559814453, -13.426681518554688, 25.601333618164062, 3.1877288818359375, 25.876968383789062, 1.766448974609375, 22.340438842773438], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000078.npy"}
|
|
{"epoch": 0.1145374449339207, "step": 79, "batch_size": 64, "mean": 16.086679458618164, "std": 19.04593276977539, "min": -12.59344482421875, "p10": -4.03254280090332, "median": 12.223844528198242, "p90": 44.80573196411135, "max": 64.92160034179688, "pos_frac": 0.734375, "sample": [-2.5362548828125, 47.49634552001953, 5.276811599731445, 59.03450012207031, 7.001670837402344, -0.5777320861816406, -1.1064434051513672, -7.0970611572265625, 38.52763366699219, 34.615264892578125, 17.264278411865234, -12.59344482421875, 22.69952392578125, 58.497802734375, 10.390975952148438, 23.064498901367188, 64.92160034179688, 47.975311279296875, 16.183635711669922, -4.4481048583984375, 62.400787353515625, 16.446205139160156, -2.1428451538085938, -8.707275390625, 23.516921997070312, 1.007406234741211, 2.3822860717773438, 14.715152740478516, 12.60708999633789, 51.252830505371094, 4.6275634765625, 8.386825561523438, 11.840599060058594, 26.887435913085938, -7.03021240234375, 8.83145523071289, -2.0174026489257812, 7.405693054199219, 37.21009826660156, 33.718231201171875, 16.424076080322266, 27.777610778808594, 29.400634765625, 17.72394561767578, 24.344131469726562, -3.7071895599365234, 2.457000732421875, 24.156204223632812, 31.64733123779297, 9.835512161254883, 12.909244537353516, -4.171979904174805, -1.0238075256347656, 33.44230651855469, -1.7798843383789062, 18.089202880859375, 5.670619964599609, 5.127799987792969, -0.14351654052734375, -4.734962463378906, 8.256317138671875, 36.22607421875, -3.508495330810547, 17.199661254882812], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000079.npy"}
|
|
{"epoch": 0.11600587371512482, "step": 80, "batch_size": 64, "mean": 14.238598823547363, "std": 18.56859016418457, "min": -16.204490661621094, "p10": -4.133474922180175, "median": 10.772218704223633, "p90": 34.709458923339845, "max": 71.16537475585938, "pos_frac": 0.765625, "sample": [26.95716094970703, 6.690452575683594, -2.4812469482421875, 22.397010803222656, 4.619140625, 2.412670135498047, -1.0446357727050781, -4.8076934814453125, 18.412002563476562, 9.915702819824219, 3.044179916381836, 14.40936279296875, -13.344783782958984, -16.204490661621094, -1.3826904296875, 20.144790649414062, 70.63186645507812, 29.267745971679688, 71.16537475585938, 41.871131896972656, 17.841079711914062, 6.2331390380859375, 35.51287841796875, 34.540077209472656, 30.500015258789062, 13.172300338745117, -0.92242431640625, 23.716705322265625, 11.25967025756836, 14.4317626953125, 1.2703857421875, -12.918907165527344, 6.307460784912109, -3.45281982421875, 21.7620849609375, -8.139076232910156, 51.91807556152344, 26.585922241210938, -6.081607818603516, 67.71099853515625, 9.033803939819336, 3.05615234375, 3.447460174560547, -0.1472930908203125, 34.78205108642578, 8.913703918457031, 20.333656311035156, 22.546890258789062, 13.782173156738281, 10.284767150878906, -0.7672004699707031, -2.8577327728271484, 0.02239227294921875, 3.3057708740234375, 21.29231071472168, 18.907730102539062, 5.1795501708984375, 4.917510986328125, 27.400596618652344, 20.375755310058594, 15.310523986816406, 29.299224853515625, -4.42518424987793, 13.354934692382812], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000080.npy"}
|
|
{"epoch": 0.11747430249632893, "step": 81, "batch_size": 64, "mean": 18.687902450561523, "std": 20.691730499267578, "min": -16.264312744140625, "p10": -5.703773498535154, "median": 15.951116561889648, "p90": 43.301515197753915, "max": 88.83258056640625, "pos_frac": 0.875, "sample": [31.316490173339844, 23.6151123046875, -16.264312744140625, 33.62360382080078, 18.977157592773438, 41.57850646972656, 15.451717376708984, -8.434085845947266, 6.25013542175293, 27.11457061767578, 23.101730346679688, -9.297210693359375, 1.1081008911132812, 18.172523498535156, 63.25433349609375, 5.304372787475586, 16.450515747070312, 24.339752197265625, 44.039947509765625, -9.465221405029297, 0.7310943603515625, 12.10577392578125, 18.793991088867188, 54.39775085449219, 21.005752563476562, 11.062076568603516, 11.808197021484375, 5.7859344482421875, 3.890012741088867, -3.1344223022460938, 19.32270050048828, 9.556472778320312, 4.090494155883789, 10.194206237792969, 28.08527374267578, 84.96875, 33.16909408569336, 13.635040283203125, 2.6289443969726562, 44.87347412109375, 30.256561279296875, 36.527740478515625, 11.168708801269531, 2.0128746032714844, 3.7940216064453125, 0.5740776062011719, -12.17831802368164, 11.887655258178711, 21.108436584472656, 21.274368286132812, 13.792552947998047, 14.44520378112793, 18.52666473388672, -8.632564544677734, 35.16307830810547, 38.390350341796875, 88.83258056640625, 50.42301940917969, 7.00482177734375, 30.432785034179688, 4.73779296875, 21.54589080810547, -6.804924011230469, 24.534027099609375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000081.npy"}
|
|
{"epoch": 0.11894273127753303, "step": 82, "batch_size": 64, "mean": 15.053365707397461, "std": 21.99736785888672, "min": -39.86485290527344, "p10": -7.459218215942382, "median": 13.338205337524414, "p90": 38.668991851806645, "max": 89.83114624023438, "pos_frac": 0.8125, "sample": [-0.8921318054199219, 24.90479278564453, -9.642158508300781, 16.534549713134766, 18.590438842773438, 16.240449905395508, -16.53729248046875, 12.032608032226562, 19.794540405273438, 49.01897430419922, 51.593658447265625, 11.659292221069336, -7.926296234130859, 5.2493896484375, 11.982397079467773, 80.78681945800781, 9.693473815917969, -39.86485290527344, -6.3693695068359375, 24.810272216796875, 1.8538055419921875, 29.055191040039062, 38.934730529785156, 3.4213085174560547, -14.368026733398438, -2.95068359375, 3.65545654296875, 16.474815368652344, 16.138656616210938, 10.006103515625, 13.414283752441406, 28.52142333984375, 25.285873413085938, -0.6442661285400391, 54.90034484863281, 15.647525787353516, 38.04893493652344, 13.262126922607422, 8.479719161987305, 0.7695770263671875, -0.12590789794921875, 24.083297729492188, 45.55958557128906, -9.736717224121094, 24.84740447998047, 15.992012023925781, 6.229948043823242, 37.35728073120117, 22.730979919433594, 2.8004302978515625, 6.253288269042969, 5.970703125, -39.73681640625, 4.415290832519531, 25.248138427734375, 18.38865852355957, 6.740287780761719, 21.672622680664062, 16.347618103027344, 89.83114624023438, 11.025245666503906, 17.714859008789062, 0.5456314086914062, 37.693931579589844], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000082.npy"}
|
|
{"epoch": 0.12041116005873716, "step": 83, "batch_size": 64, "mean": 16.844711303710938, "std": 18.570791244506836, "min": -19.188583374023438, "p10": -3.3879829406738273, "median": 14.35002326965332, "p90": 42.792594909667976, "max": 65.8651123046875, "pos_frac": 0.8125, "sample": [14.228935241699219, 16.789478302001953, 12.133663177490234, 53.36094665527344, 34.27558898925781, 10.297088623046875, -2.7680435180664062, 39.1875, 41.57659912109375, 7.757587432861328, 4.2699432373046875, 14.789369583129883, 7.485626220703125, -1.2612075805664062, 15.084007263183594, 54.727142333984375, -9.445632934570312, 65.8651123046875, 7.3061981201171875, 52.751060485839844, 29.787628173828125, 9.24020767211914, 43.31373596191406, -10.929603576660156, -19.188583374023438, -3.6536712646484375, 41.0208740234375, -11.821823120117188, 16.398191452026367, 48.54240417480469, 15.413093566894531, 11.0830078125, 3.708658218383789, 3.528839111328125, 15.507072448730469, 10.505241394042969, 23.45583724975586, -0.712493896484375, 27.057273864746094, 6.479499816894531, 15.401229858398438, 13.266246795654297, 32.224578857421875, 54.044342041015625, 17.454505920410156, 35.657623291015625, 5.788871765136719, -12.741912841796875, -1.139547348022461, -0.31975555419921875, -4.206878662109375, 30.427841186523438, 7.9567413330078125, 31.713729858398438, 18.65715789794922, 19.17778778076172, 0.08737945556640625, 6.411815643310547, 14.471111297607422, 25.682571411132812, 32.82843780517578, 12.391891479492188, 21.611732482910156, 4.067676544189453], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000083.npy"}
|
|
{"epoch": 0.12187958883994127, "step": 84, "batch_size": 64, "mean": 15.18610954284668, "std": 14.150245666503906, "min": -27.389175415039062, "p10": -0.18269309997558583, "median": 14.087615013122559, "p90": 32.353108215332036, "max": 60.357635498046875, "pos_frac": 0.875, "sample": [7.093101501464844, 10.415725708007812, 4.1419219970703125, 2.9713878631591797, 27.3028564453125, 4.3968658447265625, 3.88330078125, 16.922317504882812, 9.736272811889648, 11.000450134277344, 24.993335723876953, 23.58599090576172, -9.10980224609375, 14.492935180664062, 21.149600982666016, 27.72998809814453, 7.950168609619141, 60.357635498046875, 23.547061920166016, 1.4079132080078125, 6.577480316162109, 24.46075439453125, 11.95166015625, 5.520816802978516, -1.1441459655761719, 32.86268615722656, 19.045166015625, 31.164093017578125, -27.389175415039062, -0.5108070373535156, 41.734962463378906, 4.289390563964844, 29.36988067626953, 13.671913146972656, 12.935951232910156, 1.2694854736328125, 34.204627990722656, 28.292068481445312, 20.318161010742188, 19.072296142578125, 4.519866943359375, 25.123489379882812, -0.22431182861328125, 13.682294845581055, 23.724578857421875, 22.360977172851562, 0.1424846649169922, -0.3142433166503906, 23.478973388671875, 24.291412353515625, 4.7633209228515625, 36.59473419189453, 38.16612243652344, 18.3443603515625, 36.637359619140625, 15.84375, 24.07398223876953, 4.613868713378906, -4.8692169189453125, 24.735939025878906, 8.989021301269531, 15.118659973144531, 10.53285026550293, -0.08558273315429688], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000084.npy"}
|
|
{"epoch": 0.12334801762114538, "step": 85, "batch_size": 64, "mean": 14.56472110748291, "std": 16.396625518798828, "min": -14.452140808105469, "p10": -1.9663673400878905, "median": 12.202058792114258, "p90": 38.04382858276367, "max": 54.83708190917969, "pos_frac": 0.78125, "sample": [30.60375213623047, 9.435478210449219, 23.242576599121094, 2.2814292907714844, 5.345367431640625, 18.244220733642578, 14.117645263671875, 2.4811363220214844, 45.790626525878906, 5.465232849121094, -2.461820602416992, 7.5709991455078125, 41.99211120605469, 16.209686279296875, 15.0228271484375, 15.011260986328125, -1.092529296875, -9.011070251464844, 15.010299682617188, -2.4535293579101562, -14.452140808105469, 49.571807861328125, 11.368270874023438, -1.1961174011230469, 2.389312744140625, 31.77978515625, 10.253318786621094, 1.5613861083984375, 12.471580505371094, 7.5074310302734375, 32.8780517578125, 0.77581787109375, -5.2002105712890625, -2.0629806518554688, 10.161094665527344, 4.294881820678711, 49.8975830078125, 14.61907958984375, 25.311363220214844, 1.2049312591552734, -0.8710308074951172, 54.83708190917969, -1.0660934448242188, 28.59027099609375, 36.18363952636719, 11.932537078857422, 36.89421081542969, 36.02351379394531, 5.111328125, 3.4818248748779297, 18.661109924316406, 38.536521911621094, -1.0496826171875, -1.740936279296875, 12.570884704589844, 13.553606033325195, 41.28498840332031, 12.623531341552734, 27.18138885498047, 27.469524383544922, -11.741104125976562, 15.201835632324219, -0.1963825225830078, 32.72962951660156], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000085.npy"}
|
|
{"epoch": 0.12481644640234948, "step": 86, "batch_size": 64, "mean": 15.120819091796875, "std": 22.13315773010254, "min": -23.043365478515625, "p10": -5.099515914916992, "median": 9.609776496887207, "p90": 40.32509613037111, "max": 90.91937255859375, "pos_frac": 0.796875, "sample": [-5.258033752441406, 11.207061767578125, 41.907318115234375, -8.560562133789062, 28.549501419067383, 16.710540771484375, 20.060571670532227, -2.612396240234375, 59.299713134765625, 5.969245910644531, 3.832111358642578, 3.6467437744140625, 3.2959365844726562, 13.168380737304688, -2.7030410766601562, 14.798652648925781, 33.773590087890625, 16.749908447265625, -0.3207969665527344, -20.642791748046875, 30.894073486328125, 10.01742935180664, 22.059925079345703, 9.202123641967773, 7.111366271972656, -11.196430206298828, 4.332481384277344, 48.000885009765625, -4.729640960693359, 3.4249725341796875, 25.82590103149414, 16.185630798339844, 4.8718719482421875, -23.043365478515625, 29.609039306640625, 36.790008544921875, 15.979362487792969, -15.525703430175781, 41.84013366699219, 4.738611221313477, 78.35061645507812, 16.390155792236328, 19.766708374023438, 7.220600128173828, -0.36234474182128906, 90.91937255859375, 5.911809921264648, 78.31182861328125, 28.94464111328125, 5.20330810546875, -19.79114532470703, 30.72002410888672, 8.438587188720703, 27.638954162597656, 17.891136169433594, 27.583209991455078, 29.74013328552246, 1.4536590576171875, 5.84747314453125, 5.687618255615234, 0.60858154296875, -3.0295257568359375, 2.9356842041015625, 12.09103012084961], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000086.npy"}
|
|
{"epoch": 0.1262848751835536, "step": 87, "batch_size": 64, "mean": 16.435949325561523, "std": 19.270051956176758, "min": -30.819366455078125, "p10": -0.8358196258544919, "median": 14.930912017822266, "p90": 40.137254333496095, "max": 86.22776794433594, "pos_frac": 0.828125, "sample": [17.28917121887207, 21.421669006347656, 0.08962821960449219, 7.425071716308594, 20.080963134765625, 5.65057373046875, 15.107559204101562, -0.3676910400390625, 1.3020095825195312, 1.9622650146484375, -0.5148162841796875, 20.940048217773438, -1.797332763671875, 39.876708984375, 18.604934692382812, 27.616744995117188, 37.04975891113281, 8.826604843139648, 63.616607666015625, 30.9383544921875, 14.827224731445312, 14.466156005859375, 5.340023040771484, 26.202880859375, 18.841217041015625, 1.4536190032958984, 20.004074096679688, 86.22776794433594, 14.573379516601562, -0.4044189453125, 12.459493637084961, 40.24891662597656, 42.9310302734375, 4.775871276855469, 39.6123046875, 16.667469024658203, 11.47779655456543, 24.329654693603516, 13.720954895019531, 32.80332946777344, -0.3285636901855469, 49.71327209472656, 19.64398956298828, 18.458663940429688, -30.819366455078125, 5.162042617797852, 24.664588928222656, -23.127229690551758, 25.94318389892578, 43.599700927734375, -4.542026519775391, -3.7278404235839844, 6.152750015258789, 15.034599304199219, 0.5009613037109375, 43.522735595703125, 24.443695068359375, 23.34814453125, -19.465492248535156, 13.500640869140625, 12.6976318359375, 1.2114620208740234, 31.6090087890625, -0.9733924865722656], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000087.npy"}
|
|
{"epoch": 0.1277533039647577, "step": 88, "batch_size": 64, "mean": 12.606681823730469, "std": 17.17668342590332, "min": -24.130783081054688, "p10": -2.415430259704589, "median": 10.607963562011719, "p90": 33.84362182617188, "max": 67.9146728515625, "pos_frac": 0.828125, "sample": [10.804435729980469, 8.144256591796875, 46.1583251953125, 34.84881591796875, 24.638282775878906, 25.98822021484375, 1.0082378387451172, 39.44792175292969, 4.683753967285156, 8.727230072021484, 20.141921997070312, 4.51161003112793, 8.776443481445312, 23.441871643066406, -12.287246704101562, 20.239425659179688, -12.188568115234375, 15.358299255371094, -21.8966064453125, -19.968017578125, -1.875253677368164, 43.462371826171875, 20.991294860839844, -2.6469345092773438, 25.39508819580078, 3.3737239837646484, 18.039215087890625, -0.8442230224609375, 15.663736343383789, 20.51251220703125, 26.882225036621094, 5.011589050292969, -0.4113597869873047, 23.581199645996094, 0.9104843139648438, 54.10205078125, 16.309471130371094, 31.4981689453125, 2.5606231689453125, 67.9146728515625, 2.58221435546875, 20.732749938964844, 4.9457244873046875, 11.569101333618164, 10.7135009765625, 11.512008666992188, 4.932655334472656, 0.9863548278808594, 19.093788146972656, 41.99755859375, 19.87103271484375, 10.502426147460938, 3.452869415283203, 3.9595184326171875, -24.130783081054688, 0.3300933837890625, 6.443878173828125, -1.4608306884765625, 5.789159774780273, 12.77060317993164, 14.982309341430664, -13.190826416015625, 9.652450561523438, 27.780776977539062], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000088.npy"}
|
|
{"epoch": 0.12922173274596183, "step": 89, "batch_size": 64, "mean": 12.958422660827637, "std": 14.058304786682129, "min": -24.18975830078125, "p10": -1.1230653762817375, "median": 11.840089797973633, "p90": 30.348101806640646, "max": 63.39625549316406, "pos_frac": 0.875, "sample": [1.540924072265625, 1.6628532409667969, 2.5439300537109375, 15.57675552368164, -2.9094581604003906, 24.58734130859375, 16.245445251464844, 13.348106384277344, -1.4676971435546875, 39.88214874267578, 33.02415466308594, 18.18294906616211, 13.433143615722656, 24.607650756835938, 15.181755065917969, 23.87420654296875, 10.674545288085938, 6.524559020996094, -1.415182113647461, 12.881080627441406, 8.496902465820312, 1.6050891876220703, 10.98507308959961, 7.031436920166016, -3.0071258544921875, 4.415729522705078, -0.44145965576171875, 5.918994903564453, 35.974334716796875, 10.955039978027344, 10.620674133300781, 49.396392822265625, 13.174304962158203, 32.659034729003906, 14.181533813476562, 4.4909515380859375, 15.573402404785156, 63.39625549316406, 0.9191131591796875, 1.1223316192626953, 18.195362091064453, 5.716953277587891, 12.908243179321289, 16.87960433959961, 14.69715690612793, 6.108737945556641, 9.727596282958984, 4.154590606689453, 5.347991943359375, 19.021629333496094, 12.695106506347656, 18.957626342773438, -1.5108757019042969, 8.222373962402344, 24.95592498779297, -24.18975830078125, -5.302886962890625, 15.979736328125, 13.44929313659668, 8.721565246582031, 18.9017333984375, 15.506080627441406, 51.40016174316406, 3.3479042053222656], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000089.npy"}
|
|
{"epoch": 0.13069016152716592, "step": 90, "batch_size": 64, "mean": 15.813643455505371, "std": 20.880434036254883, "min": -21.8197021484375, "p10": -1.0104305267333982, "median": 9.538124084472656, "p90": 46.21929016113282, "max": 73.5625, "pos_frac": 0.84375, "sample": [36.44163513183594, -20.36883544921875, 26.453041076660156, 46.468048095703125, 15.69339370727539, 24.802104949951172, 0.24790382385253906, -0.5848922729492188, 34.691497802734375, 0.0051021575927734375, 26.08673095703125, 9.332504272460938, 1.6024665832519531, 9.082077026367188, 17.048057556152344, 55.811622619628906, -0.7827644348144531, 72.75946044921875, 30.257522583007812, 3.0955657958984375, 2.265735626220703, 3.2484130859375, 7.8152618408203125, -1.108001708984375, 64.20916748046875, 14.732025146484375, 20.567642211914062, 63.97174072265625, 3.664257049560547, 16.615123748779297, 73.5625, 10.281425476074219, -1.9805450439453125, 4.252241134643555, 9.743743896484375, -21.8197021484375, 15.51058578491211, 5.602367401123047, 32.010162353515625, 3.9958724975585938, 6.132289886474609, 8.557487487792969, -10.734748840332031, -0.26720428466796875, 5.3317718505859375, 37.097381591796875, 45.63885498046875, -20.332115173339844, -6.3284149169921875, 7.355224609375, 14.057838439941406, 1.9130611419677734, 13.017744064331055, 16.70989227294922, 29.958648681640625, 18.016517639160156, 8.871562957763672, 4.500574111938477, 15.5308837890625, 4.550319671630859, 10.049797058105469, 7.338996887207031, 52.608642578125, 27.21595001220703], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000090.npy"}
|
|
{"epoch": 0.13215859030837004, "step": 91, "batch_size": 64, "mean": 15.881075859069824, "std": 17.729496002197266, "min": -11.3492431640625, "p10": -3.8981338500976555, "median": 12.374858856201172, "p90": 38.38378677368164, "max": 65.98548889160156, "pos_frac": 0.765625, "sample": [12.214889526367188, 34.579742431640625, -10.089263916015625, 4.440864562988281, 38.50262451171875, -0.39513397216796875, -0.15494918823242188, 29.861156463623047, 4.894611358642578, 12.534828186035156, 27.384124755859375, 22.345840454101562, 25.170318603515625, 10.912826538085938, -11.3492431640625, 65.98548889160156, -5.0235748291015625, 1.9336318969726562, 3.688934326171875, 46.01011657714844, 9.873039245605469, 19.973339080810547, 52.465087890625, -3.0345458984375, 21.4420166015625, 38.10649871826172, -3.1705551147460938, 0.3072948455810547, 12.063064575195312, -1.4928665161132812, 1.7366943359375, 14.138349533081055, 35.56327819824219, -7.58050537109375, 16.170196533203125, 11.76129150390625, 31.85973358154297, -4.59619140625, 14.539440155029297, -0.5540027618408203, 7.322776794433594, 31.792327880859375, 35.96019744873047, 3.2818145751953125, -4.209953308105469, 20.9501953125, 16.77490997314453, 45.79248046875, 30.341453552246094, 20.513099670410156, 16.407196044921875, -10.104118347167969, 0.9147891998291016, 9.194747924804688, -0.5399742126464844, 31.999004364013672, 0.8948116302490234, 49.46910095214844, 26.79067611694336, 9.725616455078125, 29.698745727539062, -2.0508956909179688, 24.076629638671875, 48.374717712402344], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000091.npy"}
|
|
{"epoch": 0.13362701908957417, "step": 92, "batch_size": 64, "mean": 14.722909927368164, "std": 17.639738082885742, "min": -21.77862548828125, "p10": -3.8795246124267573, "median": 11.375709533691406, "p90": 36.24598274230958, "max": 65.26766967773438, "pos_frac": 0.765625, "sample": [0.3945465087890625, 2.9945220947265625, 11.119316101074219, 26.443496704101562, -8.323509216308594, 3.9676513671875, -21.77862548828125, -6.84632682800293, 22.20172119140625, 13.510833740234375, 36.948909759521484, 1.3691844940185547, 23.734298706054688, 1.3235015869140625, 3.0106887817382812, 61.62109375, 33.864051818847656, 65.26766967773438, -7.23150634765625, -4.769195556640625, 14.970943450927734, -5.651214599609375, 21.215469360351562, -2.6243629455566406, -1.8459854125976562, 26.467987060546875, -3.125720977783203, 42.67535400390625, 21.217750549316406, 7.899442672729492, 48.873390197753906, -1.1311225891113281, 22.99340057373047, 2.860300064086914, 33.0517578125, 2.811840057373047, 11.632102966308594, 50.64640808105469, 7.044136047363281, 30.729034423828125, 25.996910095214844, 34.60581970214844, -0.8587799072265625, 2.645050048828125, 3.0857772827148438, 10.877506256103516, -1.6697845458984375, 19.675399780273438, 24.23155975341797, 5.077659606933594, -1.1303634643554688, -4.202583312988281, 38.17678451538086, 8.446725845336914, 12.854913711547852, 22.65093231201172, 24.663223266601562, 22.084373474121094, 27.08123016357422, 13.86102294921875, 10.855119705200195, -1.0185317993164062, 34.256614685058594, 20.486427307128906], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000092.npy"}
|
|
{"epoch": 0.13509544787077826, "step": 93, "batch_size": 64, "mean": 14.930685043334961, "std": 15.610140800476074, "min": -17.757850646972656, "p10": -0.5473608016967771, "median": 11.557178497314453, "p90": 35.19791870117187, "max": 82.59530639648438, "pos_frac": 0.875, "sample": [18.303550720214844, 5.516513824462891, 13.937423706054688, 38.17558288574219, 39.643699645996094, 21.9937744140625, 5.1121063232421875, 9.938152313232422, 22.41558074951172, 0.10602569580078125, 38.53736877441406, 4.115516662597656, 28.61870574951172, 31.499380111694336, 82.59530639648438, 6.498912811279297, 20.43523406982422, 5.411369323730469, 8.531455993652344, 47.30077362060547, -3.975250244140625, 10.227109909057617, 15.096546173095703, -0.64794921875, 16.745864868164062, 12.326438903808594, 12.202476501464844, 6.396232604980469, -2.636140823364258, 11.594047546386719, -0.3126544952392578, 11.1275634765625, 5.999292373657227, 17.80957794189453, 23.150562286376953, 25.138107299804688, 35.15734100341797, -2.4987030029296875, -2.2795867919921875, 27.79339599609375, 21.56116485595703, 26.93730354309082, 7.691810607910156, 2.7812061309814453, 7.286724090576172, 5.158599853515625, 12.120670318603516, 2.0564002990722656, 37.14704132080078, 6.737043380737305, 2.8157577514648438, 35.215309143066406, 15.233978271484375, 6.799613952636719, 4.585784912109375, 5.116645812988281, -5.678962707519531, 29.087547302246094, 26.157665252685547, 23.68750762939453, 19.635940551757812, 11.520309448242188, -17.757850646972656, 2.5658817291259766], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000093.npy"}
|
|
{"epoch": 0.13656387665198239, "step": 94, "batch_size": 64, "mean": 15.727283477783203, "std": 14.842207908630371, "min": -5.794677734375, "p10": -1.7529954910278307, "median": 13.65335750579834, "p90": 38.72511558532715, "max": 55.517303466796875, "pos_frac": 0.859375, "sample": [14.275054931640625, -2.273059844970703, 27.487930297851562, 9.202919006347656, 7.0263671875, -4.113471984863281, 4.134151458740234, 38.2380256652832, 4.0387725830078125, -5.794677734375, 18.964418411254883, 10.399093627929688, 14.001121520996094, 2.2972869873046875, 5.895748138427734, 0.7536869049072266, 31.852874755859375, 7.5572662353515625, -2.613903045654297, 55.517303466796875, 5.046943664550781, 7.956912994384766, 16.607498168945312, 29.93370819091797, 39.197601318359375, 15.314605712890625, 11.537811279296875, 17.00798797607422, 12.246501922607422, 0.48760223388671875, 4.402517318725586, 13.328603744506836, 20.74138832092285, 25.493560791015625, 24.97454833984375, 12.498104095458984, 21.09503173828125, 33.355201721191406, 24.58855438232422, 17.490434646606445, 2.1936588287353516, 27.220481872558594, 6.67755126953125, -2.25445556640625, 38.933868408203125, 41.91438293457031, -0.5829219818115234, 42.095916748046875, 42.0830078125, -0.182373046875, -3.5108184814453125, 54.89991760253906, 13.978111267089844, 24.840286254882812, 0.4257659912109375, 8.136260986328125, -3.85064697265625, 2.5704193115234375, 24.278770446777344, 4.4713592529296875, 21.728412628173828, 25.019943237304688, 25.6246337890625, 19.682586669921875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000094.npy"}
|
|
{"epoch": 0.13803230543318648, "step": 95, "batch_size": 64, "mean": 12.927989959716797, "std": 12.508003234863281, "min": -21.892471313476562, "p10": 0.15100936889648472, "median": 12.399640083312988, "p90": 33.087457275390626, "max": 46.16912078857422, "pos_frac": 0.90625, "sample": [4.8342742919921875, 19.147682189941406, 14.460556030273438, 13.357398986816406, 11.72856330871582, 35.9486083984375, 8.001461029052734, 3.761608123779297, -0.9729824066162109, 15.043556213378906, 15.442827224731445, 3.0558319091796875, 15.441410064697266, 38.38035583496094, 33.263458251953125, 8.73086929321289, 18.070640563964844, 5.998106002807617, 5.851285934448242, 13.989822387695312, 5.46112060546875, 16.72924041748047, 13.88916015625, 10.04005241394043, 7.005348205566406, 0.8432769775390625, 3.947784423828125, 22.677696228027344, 12.383367538452148, 17.59780502319336, -0.8344459533691406, 3.87261962890625, 35.68865966796875, 38.53306579589844, 46.16912078857422, 29.040931701660156, 12.939674377441406, 2.3528785705566406, 2.21087646484375, 25.00921630859375, 24.024059295654297, 32.676788330078125, 38.313568115234375, 3.989288330078125, 13.711309432983398, 8.59488296508789, 0.4865684509277344, 18.322256088256836, 6.597099304199219, -21.892471313476562, -0.24858665466308594, 1.425008773803711, -4.5596923828125, 16.34918212890625, 24.23645782470703, 12.415912628173828, 19.79443359375, 9.667329788208008, 11.638790130615234, 12.908248901367188, 0.007198333740234375, 1.7812328338623047, 12.955854415893555, -4.896110534667969], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000095.npy"}
|
|
{"epoch": 0.1395007342143906, "step": 96, "batch_size": 64, "mean": 17.10177230834961, "std": 17.06407928466797, "min": -21.061553955078125, "p10": 0.6136047363281251, "median": 15.579402923583984, "p90": 38.818279266357436, "max": 79.47856140136719, "pos_frac": 0.921875, "sample": [25.507675170898438, 18.597156524658203, 18.766311645507812, 13.56161880493164, 24.514892578125, 30.657516479492188, 11.212326049804688, 0.9471111297607422, 0.175506591796875, 25.793792724609375, 56.52677917480469, 7.33477783203125, 15.459770202636719, 2.5499534606933594, -2.434459686279297, 16.16119384765625, 19.234275817871094, 28.829025268554688, 51.900550842285156, 43.121246337890625, 9.666719436645508, 20.13086700439453, 15.69903564453125, 79.47856140136719, 35.753883361816406, 13.106914520263672, 20.813980102539062, 17.917510986328125, 13.048675537109375, 11.296524047851562, 5.599395751953125, 56.166717529296875, 19.889020919799805, 15.732078552246094, 2.8985671997070312, 40.131591796875, 23.449752807617188, 3.9746780395507812, 21.571060180664062, 2.9327144622802734, 22.86284637451172, 21.54877281188965, 8.190292358398438, 0.580078125, 7.512470245361328, -0.06471824645996094, 10.530784606933594, 19.015869140625, 0.69183349609375, 8.545997619628906, 9.504058837890625, 22.966712951660156, -5.033271789550781, -2.6547164916992188, 25.64581298828125, 1.7023773193359375, 9.371843338012695, 22.149063110351562, 58.62528991699219, 14.236328125, 1.2128677368164062, 16.47614288330078, -21.061553955078125, 4.283073425292969], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000096.npy"}
|
|
{"epoch": 0.14096916299559473, "step": 97, "batch_size": 64, "mean": 13.75833511352539, "std": 12.895035743713379, "min": -17.874099731445312, "p10": -0.5141550064086912, "median": 13.000584602355957, "p90": 29.66490249633789, "max": 53.748199462890625, "pos_frac": 0.859375, "sample": [9.710060119628906, 29.2288818359375, 15.99896240234375, 9.632415771484375, -0.22753524780273438, 6.137199401855469, 13.366260528564453, 16.052425384521484, -0.2671680450439453, 15.660682678222656, 10.858783721923828, 19.54505157470703, 0.16001510620117188, 12.489765167236328, 47.30116271972656, 22.842117309570312, 0.8961639404296875, 6.0096893310546875, 1.0892791748046875, 8.066247940063477, 15.095695495605469, 5.430744171142578, 9.523662567138672, 3.1602935791015625, 38.843414306640625, 15.411327362060547, 15.495277404785156, 16.13751220703125, 24.413589477539062, 10.208541870117188, 13.988014221191406, -0.6200065612792969, 7.3450164794921875, -1.3163928985595703, 18.775840759277344, 13.408111572265625, 30.06719207763672, 16.836055755615234, -17.874099731445312, -1.2127418518066406, 29.851768493652344, 24.178340911865234, -2.7813491821289062, 2.660552978515625, 28.21478271484375, 13.943599700927734, 2.5015640258789062, 13.794536590576172, 12.634908676147461, 8.733543395996094, -0.6881656646728516, 25.808120727539062, -4.034685134887695, 28.511028289794922, 11.53973388671875, 28.08526611328125, 4.058189392089844, 35.98405456542969, 23.54632568359375, 32.15476989746094, 20.902843475341797, 1.4298439025878906, 53.748199462890625, 8.088134765625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000097.npy"}
|
|
{"epoch": 0.14243759177679882, "step": 98, "batch_size": 64, "mean": 14.806070327758789, "std": 15.301271438598633, "min": -33.107086181640625, "p10": -3.473175048828125, "median": 13.046260833740234, "p90": 35.41066970825195, "max": 46.804412841796875, "pos_frac": 0.828125, "sample": [10.250022888183594, 7.064455032348633, 40.66325378417969, 17.2257080078125, 3.035236358642578, 12.547853469848633, 10.164047241210938, 13.798370361328125, 11.717605590820312, 3.1187973022460938, 16.921159744262695, 10.0115966796875, 13.232696533203125, 11.335319519042969, 34.71672058105469, 28.359024047851562, -6.075168609619141, 8.275115966796875, 6.09765625, -9.021469116210938, 25.340702056884766, 21.178237915039062, -4.229736328125, 1.1066131591796875, 35.539710998535156, 26.843509674072266, 21.671218872070312, 36.006752014160156, 30.397018432617188, 15.433212280273438, 14.638015747070312, 3.124134063720703, 33.17185974121094, 40.83636474609375, -33.107086181640625, 38.08454895019531, 35.10957336425781, 15.792999267578125, 18.728591918945312, 8.769599914550781, -3.483325958251953, 23.04351806640625, -4.7819671630859375, 12.859825134277344, 17.92974853515625, 3.6186771392822266, 25.297393798828125, 45.22088623046875, 31.527076721191406, 21.758548736572266, 5.780303955078125, -0.5218505859375, 25.23223876953125, 46.804412841796875, 21.549850463867188, 11.003910064697266, 6.3940277099609375, 4.154226303100586, -3.4494895935058594, -0.7055397033691406, 33.58892059326172, -0.9688034057617188, 3.482421875, -5.620391845703125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000098.npy"}
|
|
{"epoch": 0.14390602055800295, "step": 99, "batch_size": 64, "mean": 16.02899932861328, "std": 16.38023567199707, "min": -6.7105560302734375, "p10": 1.4654903411865237, "median": 10.561936378479004, "p90": 34.031663513183595, "max": 75.30038452148438, "pos_frac": 0.9375, "sample": [2.8040637969970703, -0.146148681640625, 2.811126708984375, 12.10787582397461, 2.8706398010253906, 25.13916778564453, 7.778697967529297, 9.554145812988281, 48.448089599609375, 10.74030876159668, 8.559059143066406, 48.04338073730469, 16.516551971435547, -6.7105560302734375, 29.37933349609375, 10.362419128417969, 5.348503112792969, 21.236236572265625, 1.3019752502441406, 6.237823486328125, 23.65362548828125, 6.139865875244141, 3.940196990966797, 75.30038452148438, 2.3192520141601562, 21.931716918945312, 6.268730163574219, -2.9442214965820312, 8.600093841552734, -6.6666412353515625, 3.1980133056640625, 31.433815002441406, 19.967483520507812, 14.146934509277344, 23.592750549316406, 63.40672302246094, 34.515533447265625, 10.383563995361328, 15.137153625488281, 8.672981262207031, 1.3592147827148438, 13.580879211425781, 15.684371948242188, 26.22060775756836, 56.511573791503906, 3.1767807006835938, 10.102519989013672, 31.54784393310547, 0.30741119384765625, 3.476024627685547, 17.696182250976562, 32.90263366699219, 11.969558715820312, 6.532634735107422, 24.22882080078125, 4.663116455078125, 20.112125396728516, 10.289571762084961, 29.681365966796875, 38.87992858886719, 14.44032096862793, 23.533599853515625, 1.7134666442871094, 1.8647689819335938], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000099.npy"}
|
|
{"epoch": 0.14537444933920704, "step": 100, "batch_size": 64, "mean": 11.581656455993652, "std": 15.50505256652832, "min": -23.169559478759766, "p10": -2.857125663757324, "median": 8.712264060974121, "p90": 34.95568962097168, "max": 66.69366455078125, "pos_frac": 0.8125, "sample": [29.14939308166504, 19.03385353088379, -2.9991378784179688, 6.382438659667969, 9.552173614501953, 6.617439270019531, 19.364835739135742, 6.520191192626953, 8.706808090209961, 6.71234130859375, 7.290985107421875, 0.5030632019042969, 17.229934692382812, 9.468616485595703, 0.8705615997314453, 41.265899658203125, 10.477409362792969, -18.203109741210938, -0.1141510009765625, -11.286697387695312, -2.613046646118164, -1.4453163146972656, 2.503570556640625, -23.169559478759766, 34.503665924072266, 16.36373519897461, 27.89474868774414, 14.33807373046875, 0.460845947265625, 5.472326278686523, 21.600509643554688, 3.16717529296875, 24.279190063476562, -0.4066047668457031, 36.80354309082031, 16.400123596191406, 4.7257537841796875, 8.717720031738281, 25.979873657226562, 8.956344604492188, 21.182628631591797, 8.030891418457031, 8.424041748046875, 2.8177623748779297, 15.523628234863281, 19.39855194091797, 8.817832946777344, -9.518577575683594, -0.9626541137695312, 35.1494140625, 18.69426727294922, 37.077369689941406, 15.579540252685547, -18.634429931640625, 66.69366455078125, 1.1520767211914062, 8.442512512207031, 36.929779052734375, -2.96173095703125, 38.035736083984375, 13.325508117675781, 4.979747772216797, 15.67812728881836, 6.294788360595703], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000100.npy"}
|
|
{"epoch": 0.14684287812041116, "step": 101, "batch_size": 64, "mean": 13.298306465148926, "std": 16.0115966796875, "min": -13.437477111816406, "p10": -2.6544401168823226, "median": 11.504708290100098, "p90": 33.21231689453125, "max": 71.739501953125, "pos_frac": 0.796875, "sample": [32.69853210449219, 71.739501953125, 17.750381469726562, 15.777267456054688, 0.34046173095703125, 44.83594512939453, 49.26139831542969, 2.3334617614746094, 42.41645812988281, 51.12989807128906, -3.421173095703125, 11.400115966796875, -0.6746749877929688, 25.07335662841797, 6.761272430419922, 0.03362274169921875, 1.5172958374023438, 5.832422256469727, 19.44861602783203, 18.896392822265625, 1.1024703979492188, 10.095260620117188, 23.442710876464844, 21.192100524902344, 15.913848876953125, 11.428525924682617, 4.205787658691406, 33.43251037597656, 41.96624755859375, 10.259902954101562, 9.1107177734375, -3.3958301544189453, -6.917705535888672, 24.79449462890625, 12.85157585144043, 15.612075805664062, 25.042892456054688, 18.221710205078125, -0.924530029296875, 5.098918914794922, -0.236663818359375, -8.753665924072266, 11.580890655517578, 7.271247863769531, 6.904380798339844, -0.5351009368896484, 12.407234191894531, 15.078105926513672, 5.112945556640625, 12.202615737915039, -13.437477111816406, 24.22895050048828, 13.173538208007812, -6.6963958740234375, 16.656902313232422, 13.473901748657227, -0.3384056091308594, 19.685047149658203, 1.0326080322265625, -0.9129962921142578, 29.289276123046875, -9.167903900146484, 14.475421905517578, 2.9129180908203125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000101.npy"}
|
|
{"epoch": 0.14831130690161526, "step": 102, "batch_size": 64, "mean": 10.292984008789062, "std": 14.347973823547363, "min": -15.760269165039062, "p10": -6.197046661376951, "median": 10.288660049438477, "p90": 26.12673873901369, "max": 64.38186645507812, "pos_frac": 0.734375, "sample": [2.1675949096679688, 27.92315673828125, 15.278213500976562, 1.0778656005859375, 17.226165771484375, 2.87445068359375, -8.944412231445312, -15.760269165039062, 14.474687576293945, -12.362106323242188, 19.2967529296875, 32.12200927734375, 4.3315277099609375, 16.27684783935547, 12.590127944946289, -1.8763313293457031, 8.722618103027344, 17.316120147705078, 21.935096740722656, -0.4905281066894531, 9.32830810546875, -2.118246078491211, 12.638324737548828, -2.0390682220458984, 21.907379150390625, 12.941486358642578, 17.67669105529785, 64.38186645507812, -0.5782012939453125, 0.30353546142578125, 42.86677551269531, -0.07473373413085938, 21.171573638916016, 21.366052627563477, 2.2950305938720703, -3.7298126220703125, 9.618448257446289, -13.813644409179688, 3.6487274169921875, -8.175346374511719, 1.0453472137451172, -9.049057006835938, -7.105018615722656, 11.408660888671875, 20.055267333984375, 7.24560546875, 14.92919921875, 35.608428955078125, 19.454696655273438, 19.862152099609375, 31.891983032226562, -2.2348098754882812, 2.2524795532226562, 7.948158264160156, 30.79065704345703, 0.30731201171875, 18.486900329589844, 12.989234924316406, 10.958871841430664, 21.017471313476562, -3.48443603515625, -4.0784454345703125, 13.727210998535156, 20.928401947021484], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000102.npy"}
|
|
{"epoch": 0.14977973568281938, "step": 103, "batch_size": 64, "mean": 17.9302921295166, "std": 15.200693130493164, "min": -8.713722229003906, "p10": 1.1360929489135747, "median": 15.167030334472656, "p90": 41.95016403198244, "max": 57.15484619140625, "pos_frac": 0.90625, "sample": [4.509670257568359, 5.749458312988281, 8.477048873901367, 9.560462951660156, 10.486953735351562, -3.767669677734375, 22.46859359741211, 19.963768005371094, 6.2135009765625, 14.144878387451172, 16.40701675415039, 16.991830825805664, 5.381782531738281, 18.01367950439453, 0.8968124389648438, 34.05841827392578, 51.5111083984375, 18.342308044433594, 23.57288360595703, 21.305625915527344, 6.3739776611328125, 11.59344482421875, 16.312515258789062, 23.30975341796875, -2.8183135986328125, 32.59423828125, 19.160663604736328, -5.328460693359375, 2.898834228515625, 15.209259033203125, 10.762443542480469, 50.92523193359375, 9.811210632324219, 21.998851776123047, 31.176536560058594, 16.599571228027344, -0.5408706665039062, -8.713722229003906, 11.636953353881836, 15.060211181640625, 1.7715606689453125, 15.124801635742188, 19.05658721923828, 13.280242919921875, 10.2037353515625, 54.093666076660156, 33.79335021972656, 18.211326599121094, 57.15484619140625, 43.78443908691406, 49.08361053466797, 32.63294982910156, 10.993728637695312, 13.507331848144531, 37.670188903808594, -1.2379016876220703, 22.013559341430664, 1.6944141387939453, 6.069915771484375, 29.74969482421875, 46.82648468017578, 13.948917388916016, 12.944290161132812, 22.8564395904541], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000103.npy"}
|
|
{"epoch": 0.1512481644640235, "step": 104, "batch_size": 64, "mean": 16.904809951782227, "std": 17.527902603149414, "min": -16.956024169921875, "p10": -0.45309448242187444, "median": 12.21213150024414, "p90": 42.73717956542969, "max": 60.244659423828125, "pos_frac": 0.890625, "sample": [32.86827850341797, 5.07281494140625, 4.628778457641602, 3.2838516235351562, 27.109451293945312, 6.912315368652344, -6.484748840332031, 1.2586174011230469, 13.713653564453125, 17.695358276367188, -3.507608413696289, 10.720016479492188, 58.62034606933594, 27.295936584472656, 35.299903869628906, 1.1672592163085938, 43.280609130859375, 21.107942581176758, -3.8200149536132812, 49.171112060546875, 12.823272705078125, 13.268470764160156, 16.61417007446289, 5.3708343505859375, 7.58991813659668, 2.161905288696289, 60.244659423828125, -3.4186782836914062, 24.153045654296875, 55.11781311035156, 54.441253662109375, 14.961593627929688, 5.761177062988281, 27.068069458007812, 4.516529083251953, 3.9364051818847656, 0.5342636108398438, -3.4203948974609375, 38.47979736328125, 8.376815795898438, 29.52190399169922, 8.306076049804688, 30.470481872558594, 9.700325012207031, 47.803462982177734, 25.92057991027832, 19.28520965576172, 16.381668090820312, 8.766845703125, 22.90631103515625, -0.6943759918212891, 35.86793518066406, 6.0770111083984375, 11.187530517578125, 41.46917724609375, 34.974510192871094, 0.24753379821777344, 19.49934196472168, 0.10989570617675781, 15.21231460571289, 11.600990295410156, 2.3062095642089844, -16.956024169921875, 7.968076705932617], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000104.npy"}
|
|
{"epoch": 0.1527165932452276, "step": 105, "batch_size": 64, "mean": 22.631938934326172, "std": 16.87553596496582, "min": -11.806066513061523, "p10": 3.2308113098144533, "median": 20.428775787353516, "p90": 44.92316894531251, "max": 83.43135070800781, "pos_frac": 0.984375, "sample": [18.841156005859375, 15.715576171875, 28.528160095214844, 18.2706298828125, 23.10448455810547, 19.689598083496094, 29.0499267578125, 53.79582214355469, 9.911565780639648, 2.675201416015625, 2.7847023010253906, 40.591522216796875, 28.321319580078125, 7.86311149597168, 24.6844482421875, 24.237720489501953, -11.806066513061523, 8.121910095214844, 3.355743408203125, 41.642356872558594, 17.590415954589844, 48.16423034667969, 52.10481262207031, 9.044212341308594, 10.20220947265625, 38.905738830566406, 29.94635009765625, 19.183250427246094, 3.842782974243164, 26.027671813964844, 22.110244750976562, 54.34149169921875, 7.326133728027344, 26.824989318847656, 9.032142639160156, 11.149940490722656, 27.973777770996094, 46.29473876953125, 21.167953491210938, 10.814022064208984, 29.46440887451172, 57.24153137207031, 18.031457901000977, 18.30949592590332, 6.2127685546875, 27.781509399414062, 1.4812507629394531, 33.6888427734375, 35.571834564208984, 32.30755615234375, 12.924758911132812, 14.987442016601562, 1.53662109375, 3.1772689819335938, 25.4005126953125, 24.347183227539062, 19.50482749938965, 41.72283935546875, 25.438339233398438, 83.43135070800781, 9.948932647705078, 0.33469581604003906, 7.841331481933594, 36.33116149902344], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000105.npy"}
|
|
{"epoch": 0.15418502202643172, "step": 106, "batch_size": 64, "mean": 14.780631065368652, "std": 14.420707702636719, "min": -11.831985473632812, "p10": -2.3717979431152334, "median": 14.406625747680664, "p90": 34.00704612731934, "max": 66.73580169677734, "pos_frac": 0.828125, "sample": [13.291328430175781, 1.7923412322998047, 2.3025341033935547, 1.380044937133789, 14.854162216186523, -1.155752182006836, 25.911788940429688, -2.8395614624023438, 18.546951293945312, 32.99382400512695, 23.047542572021484, 18.79132080078125, 4.069145202636719, 38.9862060546875, 10.747222900390625, 32.65647888183594, -0.44408416748046875, 9.797447204589844, 25.838043212890625, -0.20041847229003906, 35.50135803222656, 14.30881118774414, 3.8092193603515625, 34.4412841796875, 10.435953140258789, 17.55170440673828, 9.111351013183594, -6.6809539794921875, 18.50536346435547, -4.9837188720703125, 18.76694679260254, 13.510540008544922, 17.062274932861328, 22.37569808959961, 3.1444969177246094, 7.956169128417969, 13.472610473632812, -1.2803497314453125, 34.95074462890625, 38.39080047607422, 14.504440307617188, 15.667957305908203, -11.831985473632812, 0.8845596313476562, 66.73580169677734, 13.118309020996094, 17.869102478027344, 25.923805236816406, 4.76971435546875, 4.1929168701171875, 3.1536331176757812, 20.319198608398438, 44.884849548339844, 21.741788864135742, 25.8665771484375, 20.07220458984375, -3.0828781127929688, 16.070419311523438, 5.850555419921875, 19.646011352539062, 32.51692199707031, -3.6258621215820312, -3.2850799560546875, 23.280555725097656], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000106.npy"}
|
|
{"epoch": 0.15565345080763582, "step": 107, "batch_size": 64, "mean": 16.76299285888672, "std": 15.433250427246094, "min": -16.472091674804688, "p10": -1.0375350952148417, "median": 12.995878219604492, "p90": 38.77900199890138, "max": 55.06269836425781, "pos_frac": 0.890625, "sample": [27.363006591796875, 42.24884033203125, 21.733200073242188, 34.28498840332031, 12.23681640625, 12.79037094116211, 23.380401611328125, 1.0135345458984375, -16.472091674804688, 2.3941574096679688, 7.666009902954102, 33.76130676269531, -5.1421051025390625, 21.19406509399414, 6.958396911621094, 7.844413757324219, 7.674236297607422, 42.720436096191406, 3.523214340209961, -4.2885284423828125, 35.654823303222656, 30.023414611816406, 36.47918701171875, 2.027162551879883, -2.746063232421875, 3.2206153869628906, 8.370010375976562, 2.8293914794921875, 55.06269836425781, 25.928977966308594, 18.94915199279785, 13.201385498046875, 30.254112243652344, 15.18682861328125, 26.67047882080078, 46.27821350097656, 9.188159942626953, 12.094032287597656, 7.95982551574707, -8.155509948730469, 25.923446655273438, -8.874343872070312, 27.39563751220703, 39.7646369934082, 10.01910400390625, 40.30463409423828, 6.1192779541015625, 47.9573974609375, 27.112350463867188, 25.94676971435547, 1.136566162109375, 6.994424819946289, 16.7132568359375, -1.91656494140625, 11.066967010498047, 5.37432861328125, 6.781972885131836, 30.265228271484375, 15.995010375976562, 19.626712799072266, 23.193695068359375, 24.89276123046875, 6.9338531494140625, 12.742874145507812], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000107.npy"}
|
|
{"epoch": 0.15712187958883994, "step": 108, "batch_size": 64, "mean": 17.171451568603516, "std": 19.241872787475586, "min": -12.206710815429688, "p10": -2.9826507568359375, "median": 12.531774520874023, "p90": 47.814797973632814, "max": 62.72894287109375, "pos_frac": 0.78125, "sample": [19.371261596679688, 46.10026550292969, 20.122268676757812, -8.135040283203125, -9.7625732421875, 31.926361083984375, 16.902660369873047, 2.9941787719726562, 62.591888427734375, -2.7616043090820312, 8.842910766601562, 49.268531799316406, 2.2182388305664062, -0.5796356201171875, 1.5184669494628906, 22.089439392089844, -0.25026512145996094, 34.03733825683594, 52.93180847167969, 50.20030212402344, 51.22834777832031, 28.627777099609375, -1.6878700256347656, -7.18864631652832, 17.32206153869629, 6.523109436035156, 12.40292739868164, 9.786285400390625, 46.64715576171875, 7.2580413818359375, 6.177297592163086, -1.0799026489257812, 38.74525451660156, -3.0773849487304688, 62.72894287109375, 3.47711181640625, 32.325775146484375, -3.153209686279297, -0.0245513916015625, 3.7015609741210938, 17.23843002319336, 6.656982421875, 18.298389434814453, 48.315216064453125, 14.363212585449219, 1.7998638153076172, 33.755409240722656, 27.679580688476562, 9.665340423583984, -12.206710815429688, 3.454864501953125, 29.684083938598633, 40.21929931640625, 23.98577880859375, 3.2472763061523438, -1.6530590057373047, 25.9730224609375, 9.180757522583008, 35.817604064941406, -11.316875457763672, 17.465957641601562, 27.226932525634766, 12.660621643066406, 7.094024658203125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000108.npy"}
|
|
{"epoch": 0.15859030837004406, "step": 109, "batch_size": 64, "mean": 18.880970001220703, "std": 15.617107391357422, "min": -8.151317596435547, "p10": 1.3010925292968751, "median": 17.8643159866333, "p90": 37.82953033447266, "max": 59.952484130859375, "pos_frac": 0.90625, "sample": [24.149967193603516, 3.0792198181152344, 36.652801513671875, 30.550247192382812, 3.4107437133789062, 27.34780502319336, 1.3858795166015625, 11.391559600830078, 8.487373352050781, 25.87554168701172, 45.141845703125, 13.653133392333984, 27.683197021484375, 15.08209228515625, 30.156448364257812, 5.785343170166016, 15.70926284790039, 22.6801815032959, -1.9064140319824219, -2.3064041137695312, 1.2647552490234375, 59.952484130859375, -1.2209625244140625, -2.46441650390625, 18.40797996520996, 2.3456039428710938, 49.55741882324219, 17.98674964904785, 6.422788619995117, -8.151317596435547, 19.00157928466797, 28.88990020751953, 2.005685806274414, 17.74188232421875, 27.4512939453125, 18.66950035095215, 32.80653381347656, 37.96685028076172, 33.98902130126953, 44.06871795654297, 8.269424438476562, 5.50653076171875, 3.214813232421875, 12.835872650146484, 2.46875, 31.808799743652344, 26.909576416015625, 20.78204345703125, 12.07988166809082, 36.1240234375, -3.3714599609375, 16.62677001953125, 19.214496612548828, 42.32545471191406, 47.32215881347656, 7.459495544433594, 37.509117126464844, 13.128837585449219, 35.92096710205078, 37.195899963378906, 4.8481903076171875, 1.890472412109375, 6.656711578369141, 30.95336151123047], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000109.npy"}
|
|
{"epoch": 0.16005873715124816, "step": 110, "batch_size": 64, "mean": 17.782527923583984, "std": 18.577707290649414, "min": -23.8824462890625, "p10": 0.677023506164551, "median": 15.044184684753418, "p90": 45.19076461791994, "max": 75.23970031738281, "pos_frac": 0.921875, "sample": [29.693496704101562, 14.7794189453125, 18.403682708740234, 32.628570556640625, 24.36237335205078, 16.35778045654297, 18.70187759399414, 14.914789199829102, -23.526107788085938, 15.173580169677734, 21.744529724121094, 8.757179260253906, 2.1788787841796875, 8.957860946655273, 13.004692077636719, 0.8891792297363281, 13.2977294921875, 75.23970031738281, 46.80913543701172, 12.741813659667969, 2.352712631225586, 0.4381675720214844, 65.16487121582031, 47.083892822265625, 35.157386779785156, 55.9725341796875, 3.6762313842773438, 14.7689208984375, 4.621753692626953, 30.137802124023438, -5.757926940917969, -5.865413665771484, 3.750194549560547, 9.583755493164062, 4.178371429443359, 55.8890380859375, 21.45433807373047, 41.41456604003906, 29.751632690429688, 0.9749717712402344, 27.063636779785156, 1.859696388244629, 7.730461120605469, 24.654048919677734, 10.350528717041016, 21.91187286376953, 33.653297424316406, -23.8824462890625, 20.63768768310547, -4.2315216064453125, 11.522483825683594, 1.0471248626708984, 17.625892639160156, 12.02490234375, 27.1016845703125, 16.21619415283203, 15.214271545410156, 21.742752075195312, 8.15781021118164, 29.282264709472656, 19.910385131835938, 0.5860996246337891, 9.008552551269531, 49.03617858886719], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000110.npy"}
|
|
{"epoch": 0.16152716593245228, "step": 111, "batch_size": 64, "mean": 18.308712005615234, "std": 18.513505935668945, "min": -27.56195068359375, "p10": -1.869922256469726, "median": 14.973943710327148, "p90": 42.6937454223633, "max": 81.57937622070312, "pos_frac": 0.875, "sample": [81.57937622070312, -5.58349609375, -27.56195068359375, 27.944610595703125, -2.2121353149414062, 19.309242248535156, 1.5364151000976562, 16.41156005859375, 6.355857849121094, 46.14433288574219, 15.226665496826172, 34.840606689453125, 17.31610870361328, 17.234283447265625, 12.289947509765625, 24.618072509765625, 44.826751708984375, 14.721221923828125, 36.72552490234375, 6.660951614379883, 21.169063568115234, -5.375587463378906, 1.6315498352050781, 3.805694580078125, 27.125625610351562, 37.79911804199219, 11.396167755126953, 38.02099609375, -4.5058135986328125, 9.628341674804688, 8.0665283203125, 12.144815444946289, 44.46588134765625, 9.916606903076172, 54.321868896484375, 14.534584045410156, 11.569557189941406, 21.453826904296875, 23.248260498046875, 52.548118591308594, 12.276374816894531, 0.4803619384765625, 10.093420028686523, 8.156177520751953, 53.38032531738281, 13.603347778320312, 19.297210693359375, 13.945281982421875, 36.618896484375, 1.830587387084961, 38.55876159667969, 18.82400894165039, 34.60466766357422, 37.77738952636719, 18.429122924804688, 3.535724639892578, -11.345108032226562, 18.347339630126953, 9.58256721496582, 21.59703826904297, -1.3960609436035156, 33.42932891845703, -2.0730056762695312, 0.8545455932617188], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000111.npy"}
|
|
{"epoch": 0.16299559471365638, "step": 112, "batch_size": 64, "mean": 13.259477615356445, "std": 15.364657402038574, "min": -21.741172790527344, "p10": -3.7061618804931635, "median": 10.741082191467285, "p90": 37.50633850097657, "max": 53.36078643798828, "pos_frac": 0.84375, "sample": [9.764759063720703, 46.902374267578125, 25.727676391601562, -2.792003631591797, -9.92013168334961, 11.717405319213867, 12.937736511230469, 11.9197998046875, 3.0902862548828125, -4.684051513671875, 43.767974853515625, 36.67628479003906, 1.3437385559082031, 3.8990402221679688, 37.95904541015625, 33.9885368347168, 17.07935333251953, 25.91358184814453, 20.181644439697266, 17.772850036621094, 24.869094848632812, 28.70608139038086, 29.46137237548828, 7.603813171386719, 15.111869812011719, 18.77992057800293, 14.719879150390625, 2.405475616455078, -7.210670471191406, 11.848747253417969, 1.8064098358154297, -3.3990440368652344, 11.721443176269531, 39.796669006347656, 53.36078643798828, -6.584877014160156, 20.99903106689453, 8.379207611083984, 25.781570434570312, 4.471385955810547, -21.741172790527344, 37.86207580566406, 5.487209320068359, 3.8497467041015625, 6.422813415527344, 3.967723846435547, 22.43376922607422, 2.5390357971191406, 15.434669494628906, 38.3857421875, 3.3682479858398438, 1.9244422912597656, 23.135385513305664, -3.8377838134765625, 2.9487171173095703, -4.568565368652344, 3.2585792541503906, 0.7556686401367188, 26.81310272216797, 0.7981662750244141, 9.681451797485352, 5.152345657348633, -1.2239990234375, 19.885101318359375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000112.npy"}
|
|
{"epoch": 0.1644640234948605, "step": 113, "batch_size": 64, "mean": 14.937784194946289, "std": 13.543709754943848, "min": -7.895599365234375, "p10": -2.069530296325683, "median": 13.42149543762207, "p90": 34.40156402587892, "max": 43.011505126953125, "pos_frac": 0.859375, "sample": [-7.895599365234375, 27.30297088623047, -1.5990962982177734, 29.920654296875, 7.6568756103515625, 8.673973083496094, 16.79857063293457, 24.768943786621094, 11.938127517700195, 6.031898498535156, 3.6355819702148438, 23.87701416015625, 20.585891723632812, 40.165985107421875, 12.679227828979492, -4.684965133666992, 8.977581024169922, -4.636344909667969, 9.862037658691406, 42.709136962890625, -5.8679656982421875, 17.09160614013672, 27.508079528808594, 1.4591827392578125, 22.696510314941406, 13.388004302978516, 7.761007308959961, 36.361244201660156, 14.400062561035156, -3.3894424438476562, 6.595344543457031, 40.31024932861328, 14.958904266357422, -1.0915908813476562, 13.454986572265625, 27.099510192871094, 6.8952178955078125, 28.85790252685547, 30.140594482421875, 7.5136260986328125, 21.959259033203125, 31.40612030029297, 1.0389118194580078, 2.850362777709961, -4.90399169921875, 27.997920989990234, 15.705284118652344, 15.9180908203125, -2.2711448669433594, 2.5880470275878906, 43.011505126953125, 3.4605369567871094, 16.744796752929688, 29.380292892456055, 35.685325622558594, 27.649959564208984, 20.42652702331543, 36.54020690917969, 0.5989246368408203, 8.691726684570312, 6.470703125, 2.218547821044922, 8.62735366821289, 21.311416625976562], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000113.npy"}
|
|
{"epoch": 0.16593245227606462, "step": 114, "batch_size": 64, "mean": 20.203205108642578, "std": 20.695791244506836, "min": -8.657783508300781, "p10": -0.3929000854492183, "median": 18.24544334411621, "p90": 40.8492790222168, "max": 96.96954345703125, "pos_frac": 0.890625, "sample": [57.538299560546875, 45.64160919189453, 29.70200538635254, 38.75945281982422, 8.195175170898438, 53.70361328125, 4.5404205322265625, 19.56237030029297, 7.69989013671875, 19.939117431640625, -7.385564804077148, 18.781768798828125, 29.625587463378906, 1.6256179809570312, -2.2399139404296875, 31.063156127929688, 5.624523162841797, -0.5807685852050781, 17.709117889404297, 35.67584228515625, 11.049076080322266, 8.73251724243164, -4.73724365234375, 40.09630584716797, 29.16158676147461, 31.973854064941406, 12.501518249511719, 11.006359100341797, 5.30108642578125, 27.794063568115234, 28.59941864013672, 1.6344757080078125, -0.8755645751953125, 29.134841918945312, -2.193807601928711, 5.05113410949707, 8.949384689331055, 19.934783935546875, 34.81175231933594, 27.211212158203125, 25.256732940673828, 29.22875213623047, 3.1364212036132812, 17.579313278198242, 12.35329818725586, 2.3945388793945312, 82.09562683105469, 2.4825973510742188, 20.379287719726562, 66.8600845336914, 3.8777809143066406, 29.000898361206055, 0.17650604248046875, 0.045459747314453125, 96.96954345703125, 36.9603385925293, 2.4127464294433594, 18.869781494140625, 27.737991333007812, -8.657783508300781, 7.310340881347656, 41.17198181152344, 4.016613006591797, 31.028213500976562], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000114.npy"}
|
|
{"epoch": 0.16740088105726872, "step": 115, "batch_size": 64, "mean": 15.270513534545898, "std": 18.626760482788086, "min": -28.11772918701172, "p10": -1.5947526931762683, "median": 11.810723304748535, "p90": 44.182558441162115, "max": 80.57516479492188, "pos_frac": 0.84375, "sample": [5.185342788696289, -2.193899154663086, 45.97257614135742, 16.539276123046875, 8.067718505859375, 1.3582839965820312, 2.267333984375, 5.750560760498047, 0.6764926910400391, 14.672019958496094, 6.705661773681641, -2.8873062133789062, 22.07503890991211, 18.732566833496094, 23.163835525512695, 54.46513366699219, 13.386232376098633, 3.4476699829101562, 80.57516479492188, -4.48194694519043, 22.55359649658203, 1.0098724365234375, 46.24891662597656, 25.441112518310547, 43.18328857421875, 3.1939849853515625, 18.1983642578125, 26.747974395751953, 1.6853694915771484, 26.668502807617188, 50.64814758300781, 28.609588623046875, 11.320655822753906, -28.11772918701172, 5.4793243408203125, 18.795455932617188, 21.071212768554688, 8.422500610351562, 12.300790786743164, 21.21538543701172, 14.416000366210938, 13.987960815429688, 20.386245727539062, 9.164505004882812, 54.085784912109375, -0.089202880859375, 7.4886322021484375, 5.9810943603515625, 0.5095977783203125, 6.2611236572265625, 8.944036483764648, 30.32122802734375, -0.45839691162109375, 21.30129623413086, -5.443401336669922, 44.610816955566406, 28.1640625, -2.0817623138427734, 30.348220825195312, -23.7469482421875, 28.6058349609375, -0.23140716552734375, 4.394174575805664, 2.2392578125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000115.npy"}
|
|
{"epoch": 0.16886930983847284, "step": 116, "batch_size": 64, "mean": 15.100841522216797, "std": 13.967205047607422, "min": -6.018527984619141, "p10": 0.050526809692383434, "median": 13.340904235839844, "p90": 35.974467468261736, "max": 73.95278930664062, "pos_frac": 0.890625, "sample": [5.676214218139648, 6.251064300537109, -6.018527984619141, -0.20771026611328125, 14.536825180053711, 14.437187194824219, 12.530441284179688, 9.759511947631836, 21.780248641967773, 22.31513214111328, 8.965263366699219, 1.9864253997802734, 15.783355712890625, 14.189064025878906, 1.209686279296875, -2.952423095703125, 17.59131622314453, 8.093299865722656, 25.451881408691406, 37.572113037109375, 11.593605041503906, 17.872848510742188, 7.397491455078125, 1.8927383422851562, 19.238677978515625, 10.587509155273438, 23.32489013671875, 37.7017822265625, 5.144233703613281, 6.6038055419921875, 43.82014465332031, 20.11621856689453, 10.382802963256836, 14.1513671875, -0.6609516143798828, -3.2892684936523438, 10.914497375488281, 15.34149169921875, 11.591726303100586, 38.449066162109375, 1.2332134246826172, 32.24662780761719, 9.071174621582031, 22.906631469726562, 19.095306396484375, 5.086585998535156, 30.196083068847656, 8.262252807617188, -2.3177242279052734, 24.126754760742188, 19.45962905883789, -3.9874916076660156, 73.95278930664062, 18.633556365966797, 16.14718246459961, 17.445697784423828, 12.098320007324219, 14.347335815429688, 1.6884632110595703, 7.93310546875, 0.6530799865722656, 28.158477783203125, 38.60652160644531, 40.28522491455078], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000116.npy"}
|
|
{"epoch": 0.17033773861967694, "step": 117, "batch_size": 64, "mean": 14.997328758239746, "std": 15.858565330505371, "min": -13.7943115234375, "p10": -0.38811874389648404, "median": 12.972740173339844, "p90": 32.7546989440918, "max": 68.41575622558594, "pos_frac": 0.875, "sample": [8.30661392211914, 15.071149826049805, 15.525833129882812, 9.851924896240234, 9.814170837402344, 4.699462890625, 13.339231491088867, 3.0282135009765625, 7.74481201171875, 14.988653182983398, 22.883190155029297, 32.84397888183594, 31.21820068359375, 68.41575622558594, 25.267967224121094, 8.887657165527344, 0.0006561279296875, 29.663925170898438, 27.667221069335938, -13.7943115234375, -5.2970733642578125, 4.421684265136719, 13.133987426757812, 21.514537811279297, 24.96084976196289, 3.4510955810546875, 11.54754638671875, 1.1403579711914062, 16.504989624023438, 6.547626495361328, 17.181791305541992, 43.66077423095703, -0.03972625732421875, 31.169845581054688, 37.748023986816406, 8.482269287109375, 66.80368041992188, 16.261428833007812, 4.6029815673828125, 16.270343780517578, 1.7730293273925781, 2.428234100341797, 3.722352981567383, -2.6559600830078125, -8.680747985839844, 6.855152130126953, 32.54637908935547, 15.519638061523438, 37.635719299316406, 15.857294082641602, -5.212799072265625, 2.605152130126953, 3.948169708251953, 24.78579330444336, 46.561248779296875, 24.736572265625, 5.908605575561523, 20.836929321289062, 12.811492919921875, 17.94170379638672, 12.17626953125, -0.5374298095703125, -4.9058685302734375, 17.680770874023438], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000117.npy"}
|
|
{"epoch": 0.17180616740088106, "step": 118, "batch_size": 64, "mean": 13.979833602905273, "std": 12.897310256958008, "min": -7.94024658203125, "p10": 0.25027084350585965, "median": 12.852465629577637, "p90": 28.41146125793457, "max": 53.79058837890625, "pos_frac": 0.90625, "sample": [-1.343170166015625, 8.64689826965332, 10.464056015014648, -6.062957763671875, 11.681676864624023, 14.970149993896484, 24.065513610839844, 22.28004264831543, 25.496854782104492, 28.39868927001953, 25.157974243164062, 8.977386474609375, 40.87542724609375, 10.548057556152344, 2.4372406005859375, 15.538808822631836, 5.607349395751953, 6.2277984619140625, 15.162738800048828, 53.79058837890625, 23.250534057617188, -0.9755229949951172, 0.5365753173828125, 4.4899139404296875, -7.94024658203125, 7.716344833374023, 3.4729347229003906, 9.873025894165039, 0.5966453552246094, 3.8109588623046875, 8.359931945800781, 18.121665954589844, -6.473419189453125, 28.416934967041016, 8.1749267578125, 15.806625366210938, 9.382637023925781, 14.452545166015625, 41.49578857421875, 3.8924179077148438, 4.443153381347656, -2.507549285888672, 14.160614013671875, 14.02325439453125, 40.96595764160156, 27.824424743652344, 44.0472412109375, 22.689773559570312, 30.798622131347656, 9.03089714050293, 1.7185478210449219, 21.86396026611328, 22.399017333984375, 17.142913818359375, 18.567657470703125, 15.353843688964844, 14.936513900756836, 7.059318542480469, 1.3912544250488281, 18.361045837402344, 15.302215576171875, 25.100868225097656, 0.13556671142578125, 0.517913818359375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000118.npy"}
|
|
{"epoch": 0.17327459618208516, "step": 119, "batch_size": 64, "mean": 14.818709373474121, "std": 16.255353927612305, "min": -11.993217468261719, "p10": -4.438190460205078, "median": 12.3642578125, "p90": 35.478094482421874, "max": 72.39674377441406, "pos_frac": 0.84375, "sample": [14.991790771484375, 9.237899780273438, 4.838104248046875, 47.9593505859375, 29.0115966796875, 3.6142578125, 8.64773178100586, 19.116798400878906, -2.384014129638672, 25.51844024658203, 7.190559387207031, 3.5510940551757812, 53.09906005859375, 25.977645874023438, 0.2644081115722656, 39.58892822265625, 12.987255096435547, 7.470680236816406, 72.39674377441406, 23.883712768554688, 18.974136352539062, 26.86359405517578, 25.71691131591797, 5.908237457275391, 12.83984375, 17.056564331054688, -11.5667724609375, 3.8379287719726562, 24.977096557617188, 24.557525634765625, 7.1673431396484375, -4.087394714355469, 35.06044006347656, 10.915790557861328, 35.65708923339844, -9.156883239746094, 20.29950714111328, 13.859054565429688, 2.368257522583008, 5.481048583984375, 13.650588989257812, 7.686637878417969, -4.588531494140625, 36.484336853027344, 10.52044677734375, 18.375946044921875, 10.265029907226562, -8.465560913085938, 31.163475036621094, 23.51260757446289, -9.629257202148438, -9.35833740234375, 11.606529235839844, 21.783203125, 22.0162353515625, 16.203779220581055, 19.746986389160156, 5.204174041748047, 45.5733528137207, -0.04185676574707031, -11.993217468261719, 1.6483001708984375, 11.888671875, 11.452468872070312], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000119.npy"}
|
|
{"epoch": 0.17474302496328928, "step": 120, "batch_size": 64, "mean": 18.60680389404297, "std": 17.107524871826172, "min": -12.079910278320312, "p10": -1.066276168823242, "median": 17.023963928222656, "p90": 43.022140502929695, "max": 60.889739990234375, "pos_frac": 0.875, "sample": [56.59379577636719, 26.372711181640625, 19.702072143554688, 43.530487060546875, 12.89767837524414, 10.285863876342773, -1.02099609375, 41.73074722290039, 12.835441589355469, 20.169410705566406, 28.49066162109375, 55.17486572265625, 23.18999481201172, 18.552810668945312, 3.7882423400878906, 14.7431640625, 31.477981567382812, 10.963310241699219, 22.74517059326172, -4.163825988769531, -10.529975891113281, 53.31547927856445, 21.048019409179688, 23.925495147705078, 19.286060333251953, 4.1808624267578125, 12.4178466796875, 8.026466369628906, 41.83599853515625, -12.079910278320312, -4.1842041015625, 29.546600341796875, 21.273887634277344, 1.9191093444824219, 40.15351867675781, 1.9482784271240234, 56.386444091796875, 24.10666275024414, 23.146804809570312, 0.22762107849121094, -1.0856819152832031, 4.721706390380859, 26.67822265625, 33.62432861328125, 17.193557739257812, 16.8543701171875, 44.46913146972656, 60.889739990234375, 13.230636596679688, 17.608234405517578, 0.6151046752929688, 23.23064422607422, -1.91455078125, 16.118194580078125, 16.14234161376953, 5.0640716552734375, 7.729911804199219, 28.19989013671875, 15.196334838867188, 2.7728652954101562, 27.83234405517578, -4.546104431152344, 12.9696044921875, 3.2298355102539062], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000120.npy"}
|
|
{"epoch": 0.1762114537444934, "step": 121, "batch_size": 64, "mean": 19.640243530273438, "std": 18.757091522216797, "min": -6.694976806640625, "p10": -1.167340850830078, "median": 15.933277130126953, "p90": 46.09449844360353, "max": 68.93438720703125, "pos_frac": 0.875, "sample": [2.8923702239990234, 9.991462707519531, 1.0039520263671875, 56.16680908203125, -2.429248809814453, 3.8791275024414062, 27.431320190429688, -1.3301620483398438, 44.126853942871094, 27.841690063476562, 9.162132263183594, 23.651229858398438, 49.87205505371094, 43.46564483642578, 18.747596740722656, -5.748649597167969, 17.239662170410156, 30.02779769897461, 24.79637908935547, 0.7359790802001953, 10.681783676147461, 43.539249420166016, -2.4996414184570312, 27.06927490234375, 46.937774658203125, 18.906967163085938, 13.365890502929688, 0.6294746398925781, 38.936431884765625, 4.01268196105957, 14.62689208984375, 3.7208633422851562, 26.85662078857422, 18.568328857421875, 6.32465934753418, 42.80253601074219, 1.8496475219726562, 52.63299560546875, 14.597991943359375, 68.93438720703125, 0.90576171875, 40.556053161621094, -0.9606552124023438, 11.39959716796875, 8.1170654296875, 9.651435852050781, 37.29691696166992, 5.288856506347656, 13.381568908691406, 18.047470092773438, 6.681316375732422, 2.0669937133789062, 17.74365234375, 33.76263427734375, 47.85157775878906, 39.785888671875, -6.694976806640625, 21.06768798828125, -1.25592041015625, 29.61072540283203, 64.88694763183594, 6.509529113769531, -3.511798858642578, 20.768386840820312], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000121.npy"}
|
|
{"epoch": 0.1776798825256975, "step": 122, "batch_size": 64, "mean": 24.55230712890625, "std": 18.17508316040039, "min": -8.086063385009766, "p10": 3.2047359466552736, "median": 21.91520881652832, "p90": 48.14146423339844, "max": 71.13600158691406, "pos_frac": 0.9375, "sample": [34.23554992675781, 3.4979095458984375, 13.026756286621094, 34.33427429199219, 10.734800338745117, 17.398590087890625, 38.56182861328125, 31.859886169433594, 18.53940200805664, 38.68653869628906, 7.842874526977539, 36.79210662841797, 32.412925720214844, 59.738525390625, 41.74085235595703, 22.076496124267578, 7.2319183349609375, 34.89275360107422, 48.4869384765625, 29.8126220703125, 1.9067230224609375, 47.335357666015625, 5.253932952880859, 5.612266540527344, 16.399246215820312, 19.36756134033203, 15.879035949707031, 41.167449951171875, 27.121665954589844, 25.96851348876953, 33.4615478515625, -5.545175552368164, 58.983055114746094, 2.4793834686279297, 21.636520385742188, 52.31475830078125, 24.099273681640625, 32.392616271972656, 32.10906219482422, 46.48365783691406, 7.423131942749023, 17.46185302734375, -8.086063385009766, 62.56268310546875, 21.185623168945312, 18.60052490234375, 20.26526641845703, 6.694103240966797, 30.7193603515625, -5.7531890869140625, -0.44530487060546875, 11.025421142578125, 5.255655288696289, 42.608978271484375, 71.13600158691406, 4.511287689208984, 40.71983337402344, 21.753921508789062, 3.079090118408203, 3.508525848388672, 48.7806396484375, 33.36979675292969, 13.21359634399414, 35.42689514160156], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000122.npy"}
|
|
{"epoch": 0.17914831130690162, "step": 123, "batch_size": 64, "mean": 17.00613021850586, "std": 19.7613582611084, "min": -29.73029327392578, "p10": -2.6705297470092764, "median": 15.122533798217773, "p90": 40.807359313964845, "max": 75.99698638916016, "pos_frac": 0.875, "sample": [26.45958709716797, 24.409500122070312, 15.730567932128906, 3.9194869995117188, 13.23223876953125, 16.452377319335938, 34.5545654296875, 26.7449951171875, 28.143577575683594, 19.67363739013672, 1.2033119201660156, 11.919578552246094, 8.824615478515625, 0.6778030395507812, 53.42677307128906, 21.323074340820312, 11.252971649169922, 3.6215438842773438, 0.19303131103515625, 17.16413116455078, 3.6249542236328125, 53.617393493652344, 23.147140502929688, -23.253433227539062, 10.389392852783203, 26.35662841796875, 75.99698638916016, 4.484046936035156, 39.628997802734375, 44.91233825683594, -3.0034046173095703, 39.017906188964844, 15.128963470458984, 4.327461242675781, 33.92628479003906, 39.66156005859375, 15.116104125976562, 28.620346069335938, 58.44622802734375, -5.743125915527344, 21.63214111328125, 20.9703369140625, -10.748371124267578, 0.2645111083984375, 29.850059509277344, 6.556182861328125, -13.324501037597656, 2.0394458770751953, -6.471855163574219, 64.85545349121094, -1.8938217163085938, 17.422378540039062, 11.544105529785156, 26.625694274902344, 7.164405822753906, 14.313589096069336, 41.29841613769531, 15.942583084106445, -29.73029327392578, 16.761375427246094, 13.970436096191406, 1.7940559387207031, 0.9062976837158203, 13.319562911987305], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000123.npy"}
|
|
{"epoch": 0.18061674008810572, "step": 124, "batch_size": 64, "mean": 21.367599487304688, "std": 23.805362701416016, "min": -17.739791870117188, "p10": -3.344985961914062, "median": 18.257415771484375, "p90": 47.03081359863282, "max": 111.84188842773438, "pos_frac": 0.828125, "sample": [42.20631408691406, -1.5382766723632812, 22.97238540649414, 18.24425506591797, -0.7596454620361328, 51.88459777832031, -17.739791870117188, -0.8712368011474609, 38.903533935546875, 47.6470947265625, 2.9952011108398438, 17.162399291992188, 2.5855865478515625, 18.27057647705078, 18.598480224609375, 90.60543823242188, 36.12818145751953, 8.95254898071289, 12.793380737304688, 27.56768798828125, -5.0108184814453125, -7.123390197753906, 36.35295104980469, 9.423114776611328, 65.07046508789062, 40.7799072265625, 25.792266845703125, 7.420631408691406, 27.950950622558594, 32.23438262939453, -3.5297622680664062, 19.10616111755371, 8.826873779296875, 26.168737411499023, 1.7731857299804688, 81.52078247070312, 28.306568145751953, 47.530792236328125, 9.790214538574219, 41.51263427734375, 34.381568908691406, -2.9138412475585938, 19.129650115966797, -4.23321533203125, 20.006494522094727, 32.973907470703125, -10.66583251953125, 6.8385467529296875, 9.689504623413086, 9.391357421875, 22.207813262939453, 18.853805541992188, -8.630905151367188, 42.121490478515625, 12.558258056640625, 111.84188842773438, 6.084930419921875, 45.86419677734375, 2.6063785552978516, 14.720039367675781, 17.98236846923828, 18.367515563964844, 8.802200317382812, 9.04279899597168], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000124.npy"}
|
|
{"epoch": 0.18208516886930984, "step": 125, "batch_size": 64, "mean": 19.78438949584961, "std": 17.413667678833008, "min": -8.573371887207031, "p10": 2.801374435424805, "median": 16.266315460205078, "p90": 47.07278709411624, "max": 67.39120483398438, "pos_frac": 0.9375, "sample": [3.3886375427246094, 18.590194702148438, 15.944869995117188, 1.3178939819335938, 9.248931884765625, 7.580709457397461, 37.60414123535156, -0.8243293762207031, 50.589508056640625, 11.082023620605469, 0.13245773315429688, -6.8089447021484375, 8.961671829223633, 11.981330871582031, 30.17736053466797, 24.2601318359375, 20.9459228515625, 16.58776092529297, 5.08251953125, 10.13980484008789, 49.4296989440918, 9.953149795532227, 10.6654052734375, 16.79052734375, 34.18962097167969, 67.39120483398438, -8.573371887207031, 10.472591400146484, 17.690345764160156, 4.2638092041015625, 11.151451110839844, -8.433158874511719, 10.559577941894531, 3.696775436401367, 17.383739471435547, 61.409393310546875, 4.165805816650391, 54.498802185058594, 36.15602111816406, 5.557880401611328, 6.134864807128906, 41.573326110839844, 36.109375, 36.285560607910156, 49.94380187988281, 21.925704956054688, 32.698631286621094, 9.941072463989258, 39.082305908203125, 25.456321716308594, 11.076995849609375, 31.980297088623047, 12.1964111328125, 7.169374465942383, 23.75092315673828, 17.7181396484375, 25.313461303710938, 22.59130859375, 10.355268478393555, 18.06805419921875, 57.408111572265625, 35.255615234375, 7.214332580566406, 2.5496902465820312], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000125.npy"}
|
|
{"epoch": 0.18355359765051396, "step": 126, "batch_size": 64, "mean": 19.074655532836914, "std": 17.02735710144043, "min": -13.571517944335938, "p10": 0.8240037918090828, "median": 17.302658081054688, "p90": 41.606278991699234, "max": 69.21311950683594, "pos_frac": 0.90625, "sample": [37.51531982421875, 23.695823669433594, 5.277423858642578, 27.539779663085938, 6.719573974609375, 12.013954162597656, -3.0527725219726562, 1.5326061248779297, 13.279571533203125, 62.48492431640625, 19.0299072265625, 12.905872344970703, 34.25006103515625, 20.839111328125, 22.336143493652344, -0.1916046142578125, 36.8443603515625, 13.674705505371094, 32.25060272216797, 19.00194549560547, 24.236385345458984, 17.65444564819336, 1.6117191314697266, 8.647930145263672, 18.71637725830078, 44.87945556640625, 1.9217071533203125, 42.90625, 19.36441421508789, 17.775920867919922, 11.002124786376953, 5.900428771972656, 9.19281005859375, 7.200767517089844, 38.57301330566406, 19.532791137695312, 30.35523223876953, -4.7061309814453125, 25.911361694335938, 16.950870513916016, -0.9510040283203125, 6.56121826171875, 57.919342041015625, 12.023393630981445, 13.587274551391602, 5.157833099365234, 8.582279205322266, 6.234992980957031, 30.482337951660156, 44.95063781738281, -13.571517944335938, 69.21311950683594, 20.951568603515625, 12.086441040039062, 8.512405395507812, 25.834304809570312, 18.720779418945312, 28.368576049804688, 10.396602630615234, -5.3748779296875, 0.5203170776367188, 35.45446014404297, 10.281166076660156, 59.2611083984375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000126.npy"}
|
|
{"epoch": 0.18502202643171806, "step": 127, "batch_size": 64, "mean": 16.056873321533203, "std": 14.14534854888916, "min": -8.831474304199219, "p10": -1.7270090103149411, "median": 15.299047470092773, "p90": 37.327684020996095, "max": 49.2900390625, "pos_frac": 0.859375, "sample": [16.423660278320312, 29.278724670410156, 0.23836898803710938, 15.320930480957031, -0.33263397216796875, -4.306343078613281, 40.07428741455078, 7.43670654296875, 38.31462860107422, 8.747650146484375, 5.41485595703125, 19.43762969970703, 30.03857421875, -3.7366714477539062, 12.651744842529297, 6.857349395751953, 16.04125213623047, 1.3340301513671875, -5.157440185546875, 33.643341064453125, 10.868614196777344, 36.37457275390625, 28.18244171142578, 15.815948486328125, 29.70968246459961, 43.66114044189453, 20.896381378173828, 23.40777587890625, 8.403129577636719, 15.664745330810547, 41.012229919433594, 15.078483581542969, 29.570556640625, 10.520336151123047, 6.7598419189453125, 25.242332458496094, -1.81494140625, 4.773681640625, 15.277164459228516, -1.5218334197998047, 5.846736907958984, 37.73616027832031, 5.679058074951172, 15.163490295410156, 20.768600463867188, 22.12140655517578, 21.971435546875, 33.06840515136719, 24.49602508544922, 20.876983642578125, -6.4668731689453125, 14.202919006347656, 6.613210678100586, 43.42048645019531, -8.831474304199219, 9.122200012207031, 1.1606521606445312, 49.2900390625, 20.574249267578125, 4.3011016845703125, 17.29647445678711, 23.94442367553711, -3.73956298828125, 3.42071533203125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000127.npy"}
|
|
{"epoch": 0.18649045521292218, "step": 128, "batch_size": 64, "mean": 16.93927001953125, "std": 17.68482208251953, "min": -11.791961669921875, "p10": -1.4636808395385739, "median": 11.172279357910156, "p90": 44.822631454467775, "max": 73.08222961425781, "pos_frac": 0.859375, "sample": [13.191211700439453, 9.90194320678711, 39.548057556152344, -10.636039733886719, 43.8994140625, -3.5574913024902344, 45.21829605102539, 39.42198944091797, 8.616996765136719, 11.184623718261719, 46.570037841796875, 10.376312255859375, 22.026771545410156, 9.218502044677734, 1.6379013061523438, 31.526580810546875, 24.100692749023438, 3.7650146484375, 5.575553894042969, 27.200786590576172, -7.291168212890625, 0.06180572509765625, 73.08222961425781, -11.791961669921875, 47.99957275390625, 9.9425048828125, 32.23724365234375, 42.30828094482422, 11.159934997558594, 14.681028366088867, -1.6480903625488281, 0.7930908203125, 26.977705001831055, 8.723419189453125, 1.5594863891601562, 9.381134033203125, 8.328826904296875, 22.72745132446289, 8.447433471679688, 26.038715362548828, 16.39471435546875, -1.652811050415039, 31.71685791015625, -4.802865982055664, 2.468568801879883, 51.36283874511719, 1.9582958221435547, 31.913192749023438, 13.787864685058594, 18.541603088378906, 9.273818969726562, 9.163528442382812, 14.813873291015625, -1.0333919525146484, 7.417442321777344, 13.157180786132812, 9.494720458984375, 22.509033203125, 18.983352661132812, -0.781097412109375, 19.70758056640625, 48.5096435546875, 48.23308563232422, 0.4703636169433594], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000128.npy"}
|
|
{"epoch": 0.18795888399412627, "step": 129, "batch_size": 64, "mean": 17.114233016967773, "std": 20.050907135009766, "min": -32.344268798828125, "p10": -3.533080863952636, "median": 14.297990798950195, "p90": 41.12254257202149, "max": 80.16046142578125, "pos_frac": 0.8125, "sample": [-2.4986400604248047, 12.493734359741211, -3.826265335083008, 40.99700164794922, 2.3004913330078125, 20.597259521484375, 3.8612213134765625, 0.424957275390625, 39.80818176269531, 17.945316314697266, 22.66339111328125, 29.94354248046875, 8.57071304321289, 20.160781860351562, 18.40694808959961, 24.813201904296875, 58.1282958984375, 2.420654296875, 6.166961669921875, 29.744680404663086, 47.969482421875, 31.17443084716797, 35.82398986816406, -15.976036071777344, 18.09044647216797, -8.508514404296875, 3.9539260864257812, 9.632244110107422, 80.16046142578125, 47.33600616455078, 11.702682495117188, 20.836807250976562, -2.8489837646484375, 30.77739715576172, -3.9326858520507812, 3.302165985107422, 7.908683776855469, 37.312408447265625, 8.914619445800781, 0.09697723388671875, -6.320915222167969, 10.500150680541992, 13.474990844726562, 35.055477142333984, 54.99156188964844, -32.344268798828125, 9.254053115844727, 4.871984481811523, -0.682373046875, -1.4969062805175781, 33.049339294433594, 51.80754089355469, 22.620872497558594, 18.51996612548828, 28.103126525878906, 4.258995056152344, 32.355567932128906, 15.120990753173828, 1.9589157104492188, 22.898284912109375, 34.51396179199219, -12.777015686035156, 41.17634582519531, -2.4487075805664062], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000129.npy"}
|
|
{"epoch": 0.1894273127753304, "step": 130, "batch_size": 64, "mean": 22.913349151611328, "std": 20.445497512817383, "min": -24.745529174804688, "p10": -1.164003562927246, "median": 20.502984046936035, "p90": 49.86514434814453, "max": 63.07379913330078, "pos_frac": 0.875, "sample": [29.366539001464844, 23.928112030029297, 3.8792037963867188, 48.81367492675781, 6.154655456542969, 12.646224975585938, 41.36921691894531, 41.07319641113281, 4.964935302734375, 1.962646484375, 16.977964401245117, 12.214794158935547, 19.51488494873047, 34.83164978027344, 63.07379913330078, 6.380317687988281, 49.93391418457031, -1.2131290435791016, 18.40494155883789, 37.18635559082031, 7.156459808349609, 11.51953125, -1.689453125, 29.192733764648438, 35.49224853515625, 13.653312683105469, 24.712806701660156, 51.91169738769531, 23.602935791015625, 55.001007080078125, 46.58654022216797, 48.959869384765625, -1.5593605041503906, 7.3712921142578125, 34.03782653808594, -24.745529174804688, 12.053873062133789, 12.15843391418457, 14.081672668457031, 2.2280120849609375, 24.853912353515625, 20.852428436279297, 36.170066833496094, 58.8460693359375, 54.08715057373047, -6.533323287963867, 62.45941162109375, -7.702522277832031, -1.04937744140625, 18.636581420898438, 28.06100845336914, 11.40829849243164, 49.655784606933594, 6.873863220214844, 35.88789367675781, -18.781837463378906, 41.00047302246094, 3.0073795318603516, 21.582473754882812, 46.519317626953125, 49.704681396484375, 30.856590270996094, 20.153539657592773, 6.7147064208984375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000130.npy"}
|
|
{"epoch": 0.19089574155653452, "step": 131, "batch_size": 64, "mean": 20.071718215942383, "std": 17.16399574279785, "min": -14.5615234375, "p10": 2.0814064025878922, "median": 17.705707550048828, "p90": 45.24000930786133, "max": 65.72723388671875, "pos_frac": 0.9375, "sample": [20.265316009521484, 15.916746139526367, 20.460702896118164, 16.149974822998047, 51.08819580078125, -14.5615234375, 5.886669158935547, 13.602874755859375, 22.685531616210938, 29.730728149414062, 48.333282470703125, 10.893867492675781, 10.730361938476562, 4.761320114135742, 17.574783325195312, 3.96563720703125, 9.071056365966797, -2.3183155059814453, 39.1544189453125, 18.391632080078125, 20.851776123046875, 41.03276062011719, 11.066783905029297, 1.1534805297851562, 17.836631774902344, 3.4603271484375, 45.27325439453125, 5.94611930847168, 34.6700439453125, 23.287065505981445, 13.440488815307617, -12.204208374023438, 45.162437438964844, 1.4904403686523438, 65.72723388671875, 0.7744102478027344, 17.407455444335938, 5.953327178955078, 48.012481689453125, 15.732614517211914, 23.92828369140625, 19.28866958618164, 61.241546630859375, 9.694644927978516, -7.805107116699219, 26.1768798828125, 7.0194549560546875, 27.46283721923828, 3.653169631958008, 11.74282455444336, 4.330684661865234, 30.779266357421875, 9.120361328125, 49.11585998535156, 27.735549926757812, 37.401397705078125, 25.28839874267578, 24.677772521972656, 8.221590042114258, 24.681625366210938, 19.875633239746094, 40.038978576660156, 43.001129150390625, 10.06036376953125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000131.npy"}
|
|
{"epoch": 0.19236417033773862, "step": 132, "batch_size": 64, "mean": 19.139083862304688, "std": 17.331829071044922, "min": -5.791587829589844, "p10": 1.309801483154297, "median": 15.264236450195312, "p90": 43.261308288574234, "max": 73.47293090820312, "pos_frac": 0.921875, "sample": [9.561504364013672, 4.311084747314453, 48.27348327636719, -5.791587829589844, 17.93781280517578, 27.664024353027344, 21.380847930908203, 71.065185546875, 61.889068603515625, 5.682643890380859, 20.95184326171875, 23.694984436035156, 1.119913101196289, 9.073272705078125, -0.0846710205078125, 11.958438873291016, 39.943603515625, 12.324390411376953, 16.161651611328125, 4.557992935180664, 5.933811187744141, 3.0295791625976562, 29.50843048095703, 37.170310974121094, 1.3752822875976562, 13.6270751953125, 5.760957717895508, 31.21918487548828, 1.28173828125, 9.503303527832031, 13.924308776855469, 2.8864593505859375, 6.315860748291016, 24.19707489013672, 32.33559799194336, 30.045997619628906, 17.53569793701172, 12.303211212158203, 26.225040435791016, 25.44662094116211, 16.9116268157959, 20.504226684570312, 26.400272369384766, 35.302955627441406, -4.286506652832031, 13.438518524169922, 46.133277893066406, -4.345466613769531, 5.882717132568359, 32.206520080566406, -1.3132953643798828, 28.100936889648438, 44.68318176269531, 4.0767974853515625, 13.047088623046875, 6.9593658447265625, 19.548583984375, 23.065994262695312, 14.3668212890625, 9.787796020507812, 47.278228759765625, 2.9474143981933594, 19.43030548095703, 73.47293090820312], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000132.npy"}
|
|
{"epoch": 0.19383259911894274, "step": 133, "batch_size": 64, "mean": 17.867952346801758, "std": 19.537412643432617, "min": -13.41998291015625, "p10": -0.38848419189453115, "median": 13.57284927368164, "p90": 45.056110382080085, "max": 73.07952880859375, "pos_frac": 0.875, "sample": [19.526992797851562, 41.11528015136719, 14.015823364257812, 26.774612426757812, 5.365390777587891, -4.2296295166015625, 7.66552734375, 5.6786956787109375, -11.410308837890625, 53.34089660644531, 16.993896484375, 25.28357696533203, 1.272979736328125, 45.65874481201172, -13.066299438476562, 0.9813461303710938, 20.544464111328125, -0.43719482421875, 60.083740234375, 10.686002731323242, 37.53512191772461, 11.343536376953125, 3.9093170166015625, 33.60771179199219, 1.8182754516601562, 35.98521423339844, 14.656394958496094, 23.798099517822266, 33.606712341308594, 16.765396118164062, 13.129875183105469, 4.718051910400391, 11.221418380737305, 63.82679748535156, 3.39892578125, 23.10993194580078, 22.957809448242188, 0.9936370849609375, 55.71124267578125, 22.813922882080078, 33.045047760009766, 14.501899719238281, 5.833681106567383, 73.07952880859375, -13.41998291015625, 1.2357254028320312, 21.639144897460938, 47.266937255859375, -0.2748260498046875, 5.929676055908203, 9.717544555664062, 32.19117736816406, -10.828323364257812, 43.64996337890625, 27.369178771972656, -3.1688079833984375, 43.53074645996094, 12.948684692382812, 22.58885955810547, 7.746040344238281, 3.0234222412109375, 0.14175796508789062, 3.874032974243164, 1.1759490966796875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000133.npy"}
|
|
{"epoch": 0.19530102790014683, "step": 134, "batch_size": 64, "mean": 19.3120174407959, "std": 15.649565696716309, "min": -2.265575408935547, "p10": 3.743128395080567, "median": 12.93411636352539, "p90": 42.590901184082036, "max": 61.168853759765625, "pos_frac": 0.984375, "sample": [3.50262451171875, 4.396240234375, 12.793724060058594, 43.18390655517578, 60.981475830078125, 8.696495056152344, 18.810733795166016, 7.046581268310547, 6.410602569580078, 13.681751251220703, 22.953903198242188, 9.611648559570312, 5.468992233276367, 7.6067962646484375, 20.354347229003906, 14.531524658203125, 4.304304122924805, 27.947052001953125, 23.641677856445312, 6.192859649658203, 7.686985015869141, 1.7719879150390625, 11.772514343261719, 21.581180572509766, 56.92341613769531, 26.80487823486328, 41.118896484375, 44.405941009521484, 27.233123779296875, 10.721317291259766, 3.3946380615234375, 36.15673828125, 7.790691375732422, 12.417705535888672, 33.952117919921875, 41.20722198486328, 52.62232208251953, 5.165363311767578, 22.545513153076172, 7.466609954833984, 7.97650146484375, 13.074508666992188, 9.925346374511719, 7.656904220581055, 61.168853759765625, 18.44516372680664, 28.683151245117188, 9.545646667480469, 28.226333618164062, 2.9520626068115234, 12.618431091308594, 3.4773101806640625, 1.0598678588867188, 46.38372802734375, 33.94384765625, 20.79448699951172, 26.448684692382812, 7.019184112548828, 12.283912658691406, 24.218650817871094, -2.265575408935547, 18.805850982666016, 11.926738739013672, 36.74310302734375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000134.npy"}
|
|
{"epoch": 0.19676945668135096, "step": 135, "batch_size": 64, "mean": 20.7138614654541, "std": 19.68378257751465, "min": -16.650279998779297, "p10": 1.4211303710937504, "median": 16.666990280151367, "p90": 49.81827011108399, "max": 70.25750732421875, "pos_frac": 0.9375, "sample": [23.132003784179688, 27.828262329101562, 1.0589637756347656, 48.211029052734375, 22.381134033203125, 33.36766815185547, 66.48635864257812, 19.31067657470703, 55.568206787109375, 61.91825866699219, 49.28528594970703, 6.974830627441406, 50.04669189453125, 41.188987731933594, 59.07037353515625, 15.44803237915039, 12.641494750976562, 35.1588134765625, 9.172870635986328, 3.1340160369873047, 6.493253707885742, 43.189857482910156, 36.315025329589844, 25.58636474609375, 12.946527481079102, 36.43757629394531, 10.793655395507812, 70.25750732421875, -10.799947738647461, 33.401336669921875, 4.1341094970703125, 54.28875732421875, 46.98667907714844, -16.650279998779297, 11.75863265991211, 6.760753631591797, 12.230178833007812, 19.02142333984375, 18.646041870117188, 4.12872314453125, -8.798187255859375, 3.9366531372070312, 18.794416427612305, 6.431724548339844, 18.896427154541016, 13.702262878417969, 16.374622344970703, 0.07662200927734375, 17.604995727539062, 1.8292999267578125, 6.466184616088867, 14.752281188964844, 23.798812866210938, 10.250600814819336, 1.2462005615234375, 26.5679931640625, 17.472610473632812, 33.9678955078125, 9.206443786621094, -7.19049072265625, 7.0327301025390625, 4.77593994140625, 4.2216644287109375, 16.95935821533203], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000135.npy"}
|
|
{"epoch": 0.19823788546255505, "step": 136, "batch_size": 64, "mean": 16.596004486083984, "std": 19.312175750732422, "min": -6.646942138671875, "p10": -2.215103912353514, "median": 12.25448226928711, "p90": 34.04188499450684, "max": 113.54296875, "pos_frac": 0.875, "sample": [20.797786712646484, 18.44683074951172, 26.700225830078125, 17.07642364501953, 4.440177917480469, -3.414287567138672, 49.45396423339844, 33.90541458129883, -6.35546875, 14.487079620361328, 24.154495239257812, 22.46271514892578, 32.44667053222656, 2.912567138671875, 27.280532836914062, 12.515716552734375, 19.006942749023438, 8.451257705688477, 35.7366943359375, 71.3614501953125, -0.7653350830078125, 34.100372314453125, 113.54296875, 6.1587677001953125, 17.54216766357422, 10.221122741699219, 37.41162109375, 9.348011016845703, 53.99676513671875, 0.09586524963378906, 18.32769012451172, 5.402229309082031, 22.348024368286133, 9.295120239257812, 9.87127685546875, 6.8871612548828125, 10.442352294921875, 1.8848190307617188, 20.072219848632812, 14.917984008789062, 12.974002838134766, -6.646942138671875, -3.679168701171875, 29.846389770507812, -3.259187698364258, 3.054311752319336, 7.039089202880859, 21.593843460083008, 31.893474578857422, 25.531768798828125, -2.8364334106445312, 8.940101623535156, 13.664352416992188, 10.307182312011719, 9.332763671875, 5.4732666015625, 6.602508544921875, 4.217418670654297, -4.434501647949219, 0.5456180572509766, 13.69748306274414, 31.833267211914062, 11.993247985839844, 1.491973876953125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000136.npy"}
|
|
{"epoch": 0.19970631424375918, "step": 137, "batch_size": 64, "mean": 22.422649383544922, "std": 22.97294044494629, "min": -26.417160034179688, "p10": -2.011648368835448, "median": 19.237220764160156, "p90": 55.12747726440432, "max": 94.377197265625, "pos_frac": 0.859375, "sample": [22.266815185546875, 15.119026184082031, 10.911819458007812, 28.429458618164062, 42.715782165527344, 6.459144592285156, 6.027900695800781, 20.157394409179688, 1.3596382141113281, 46.466880798339844, 7.1747283935546875, 21.990982055664062, 94.377197265625, 6.047054290771484, 17.322853088378906, -5.400627136230469, 24.870208740234375, 14.919807434082031, 12.999889373779297, 50.01258850097656, 57.31957244873047, 31.007713317871094, 2.9586620330810547, 64.1632080078125, 23.535873413085938, 41.25636291503906, 47.101806640625, 20.217575073242188, 2.6119136810302734, 60.009361267089844, -9.03814697265625, 22.4384765625, 19.34821319580078, 9.335060119628906, 43.680145263671875, 0.8161201477050781, 49.000701904296875, 29.408071517944336, -2.4619979858398438, 17.685562133789062, 22.105972290039062, 11.819908142089844, 23.559814453125, -26.417160034179688, -2.5679397583007812, -5.5531768798828125, 33.62591552734375, 4.462299346923828, 19.12622833251953, 74.9710693359375, 14.563751220703125, 27.288375854492188, 5.321691513061523, 39.38702392578125, -0.9608325958251953, -0.15444374084472656, 8.254318237304688, 4.066967010498047, 5.558492660522461, 32.52594757080078, 65.06916809082031, -3.1615447998046875, 41.19105529785156, 66.34384155273438], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000137.npy"}
|
|
{"epoch": 0.2011747430249633, "step": 138, "batch_size": 64, "mean": 18.525365829467773, "std": 18.633071899414062, "min": -33.681884765625, "p10": -0.11109848022460778, "median": 16.008777618408203, "p90": 41.25808258056641, "max": 69.5491943359375, "pos_frac": 0.890625, "sample": [39.491180419921875, -4.4503326416015625, 35.484859466552734, 43.410736083984375, 19.295684814453125, 12.923103332519531, 4.2929840087890625, -0.7952766418457031, 2.4225311279296875, 24.4642333984375, 16.65163803100586, 16.181121826171875, 31.103790283203125, 2.23101806640625, 15.113897323608398, -33.681884765625, 9.839311599731445, 1.779541015625, 2.5341835021972656, 15.836433410644531, 38.96942901611328, 47.657569885253906, 3.1077938079833984, 2.89764404296875, 57.82499313354492, 32.185302734375, 4.201072692871094, 6.212413787841797, 26.406410217285156, 40.249237060546875, 23.34906005859375, 5.333707809448242, 21.935138702392578, 9.911163330078125, 46.318077087402344, 41.69044494628906, 8.27703857421875, 26.657054901123047, -7.794441223144531, 10.713264465332031, 24.359451293945312, 14.974353790283203, 13.653594970703125, 1.4853172302246094, 4.743003845214844, -2.0604324340820312, 34.400787353515625, 39.30464172363281, 11.96444320678711, 25.289642333984375, 33.432952880859375, 14.920160293579102, 23.8414306640625, -19.450096130371094, -9.662622451782227, 27.510486602783203, 11.384078979492188, 24.384201049804688, 6.5426483154296875, 69.5491943359375, 25.32115936279297, 21.007705688476562, 33.49738311767578, 54.99884033203125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000138.npy"}
|
|
{"epoch": 0.2026431718061674, "step": 139, "batch_size": 64, "mean": 20.38736343383789, "std": 18.344341278076172, "min": -6.552803039550781, "p10": -0.2380430221557616, "median": 16.26479148864746, "p90": 44.9964469909668, "max": 72.0260009765625, "pos_frac": 0.875, "sample": [-3.5448455810546875, 28.99005889892578, 28.669097900390625, 36.99430847167969, -0.7320632934570312, 28.961395263671875, 44.12449645996094, 3.175830841064453, 16.341968536376953, 35.82795715332031, -5.1295013427734375, 23.069679260253906, 4.272224426269531, 62.145263671875, 45.370140075683594, 27.053937911987305, 36.84132385253906, 17.529518127441406, 15.057891845703125, 12.124549865722656, 52.441184997558594, 19.419300079345703, -0.8468856811523438, 25.43292236328125, 16.18761444091797, 3.5305919647216797, 40.74041748046875, 30.824310302734375, 3.8651657104492188, 24.548934936523438, 39.48115539550781, -0.2877063751220703, 14.422782897949219, 21.99353790283203, 17.061111450195312, 17.22711181640625, 11.68511962890625, 0.2705707550048828, 0.6375465393066406, 5.414207458496094, 9.393997192382812, 0.3672065734863281, 6.699394226074219, 36.059967041015625, 14.54940414428711, 57.707611083984375, 31.73448944091797, -6.552803039550781, 8.366584777832031, 3.0002593994140625, 32.52429962158203, -1.079132080078125, 15.50025749206543, 14.323341369628906, 72.0260009765625, 11.891643524169922, 64.08726501464844, 11.793914794921875, 33.02308654785156, -0.122161865234375, 48.260833740234375, 15.495231628417969, 7.111686706542969, 17.436668395996094], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000139.npy"}
|
|
{"epoch": 0.20411160058737152, "step": 140, "batch_size": 64, "mean": 19.300575256347656, "std": 19.64964485168457, "min": -16.71148681640625, "p10": -3.006729698181151, "median": 15.208828926086426, "p90": 44.81263732910156, "max": 68.39668273925781, "pos_frac": 0.859375, "sample": [3.270284652709961, 37.2176399230957, 66.89739990234375, -5.511497497558594, -1.2245025634765625, 30.784042358398438, 37.85101318359375, 49.72243881225586, 24.052682876586914, 42.5933837890625, 37.76409912109375, 16.69546127319336, 30.595230102539062, 6.244192123413086, 22.753189086914062, 15.945215225219727, 51.67810821533203, 3.411914825439453, -1.8921661376953125, 23.59178924560547, 6.523139953613281, 23.360572814941406, 22.74365234375, 33.26471710205078, 1.6998672485351562, 5.775360107421875, 45.18928527832031, 9.68841552734375, 6.2513885498046875, 38.69191360473633, 9.492351531982422, 6.553825378417969, 7.8579559326171875, 37.706787109375, 2.7270660400390625, 14.472442626953125, 37.31025314331055, 12.028274536132812, 5.053466796875, 24.700729370117188, 30.965248107910156, -5.442340850830078, 43.93379211425781, -16.71148681640625, -4.836967468261719, 22.09079360961914, 12.095504760742188, 28.36199951171875, 54.960304260253906, 3.5244979858398438, -3.4843997955322266, 32.3988037109375, 0.4922466278076172, 29.113800048828125, -14.639900207519531, 10.961437225341797, -5.8647918701171875, 0.3572235107421875, 68.39668273925781, 53.57978820800781, 3.7608718872070312, 11.364421844482422, 3.0160064697265625, 33.31182861328125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000140.npy"}
|
|
{"epoch": 0.2055800293685756, "step": 141, "batch_size": 64, "mean": 19.22583770751953, "std": 17.37042808532715, "min": -18.364532470703125, "p10": -1.4518745422363255, "median": 18.982088088989258, "p90": 45.20720291137695, "max": 55.35432434082031, "pos_frac": 0.890625, "sample": [51.14314270019531, 16.406333923339844, 14.188129425048828, 32.649513244628906, 49.70185852050781, 4.309135437011719, 24.847015380859375, 22.973209381103516, 14.98297119140625, 1.8945121765136719, 14.267780303955078, 45.775665283203125, -18.364532470703125, 17.90656280517578, -2.5840682983398438, 21.689041137695312, 3.2019901275634766, -4.436553955078125, 55.35432434082031, -14.18817138671875, 31.149749755859375, 42.99015808105469, 20.057613372802734, 33.01353454589844, 38.33428955078125, 23.384201049804688, 21.041778564453125, 14.503059387207031, 12.854339599609375, 14.430137634277344, 26.046424865722656, 5.843982696533203, 4.197292327880859, 38.58856201171875, 50.779052734375, 6.429319381713867, 27.207351684570312, 7.1212921142578125, -7.63813591003418, 6.977022171020508, 1.189910888671875, 25.414873123168945, 7.792167663574219, 50.06648254394531, 6.90101432800293, 33.565879821777344, 27.33197784423828, 8.8935546875, 5.81126594543457, -12.627593994140625, 14.393508911132812, 31.72307586669922, 12.280509948730469, 4.868721008300781, 23.040283203125, 35.747154235839844, 44.898223876953125, 28.04338836669922, 3.35992431640625, 45.339622497558594, 25.639389038085938, 20.42901611328125, -6.071632385253906, 23.39401626586914], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000141.npy"}
|
|
{"epoch": 0.20704845814977973, "step": 142, "batch_size": 64, "mean": 19.969118118286133, "std": 22.05319595336914, "min": -42.372589111328125, "p10": -2.91082649230957, "median": 17.22383403778076, "p90": 52.73888015747071, "max": 68.15338134765625, "pos_frac": 0.875, "sample": [-2.6151161193847656, 58.795745849609375, 2.7047863006591797, 9.82394027709961, 5.397380828857422, 32.53507995605469, -19.46999740600586, 5.923982620239258, -11.8570556640625, 10.628849029541016, 30.436019897460938, 33.821502685546875, 51.374122619628906, 11.353107452392578, 18.19025993347168, 39.84822082519531, 1.0579948425292969, -42.372589111328125, -3.4050750732421875, 19.505630493164062, 29.33995819091797, 20.82709503173828, 7.883808135986328, 47.210693359375, 53.32377624511719, 14.35209846496582, 19.779457092285156, 26.22041130065918, 30.06043243408203, -3.0531558990478516, 19.17041015625, 50.76123809814453, 31.31207275390625, 25.45032501220703, 0.5121192932128906, 56.99659729003906, 58.89039611816406, 16.257408142089844, 2.7511959075927734, 3.970733642578125, 1.8459320068359375, 7.145938873291016, 0.6776657104492188, 45.1683349609375, 21.294296264648438, 50.92462158203125, 4.554975509643555, -3.0375595092773438, 10.338096618652344, 12.061012268066406, 25.256000518798828, 0.402099609375, 9.12834358215332, -6.7038116455078125, 68.15338134765625, 58.26654815673828, 9.245506286621094, 54.622962951660156, 3.9746780395507812, 6.301965713500977, 32.951393127441406, 43.57183074951172, 20.654951095581055, 37.53056335449219], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000142.npy"}
|
|
{"epoch": 0.20851688693098386, "step": 143, "batch_size": 64, "mean": 15.336509704589844, "std": 18.686594009399414, "min": -24.52129364013672, "p10": -6.495886993408202, "median": 13.567496299743652, "p90": 37.572973632812506, "max": 77.52992248535156, "pos_frac": 0.765625, "sample": [16.862640380859375, 2.8096466064453125, 20.01862335205078, 20.990215301513672, 49.58038330078125, 11.652427673339844, -2.7193470001220703, 34.428871154785156, 42.81099319458008, 5.432134628295898, 28.5020751953125, -24.52129364013672, -7.884834289550781, 21.62896728515625, 24.010665893554688, -1.7915477752685547, -8.641702651977539, 3.489957809448242, 31.908432006835938, -6.772735595703125, 9.081764221191406, 39.82331848144531, -3.7661094665527344, 23.2249755859375, 17.3917236328125, -11.194469451904297, 20.49639892578125, 33.1949462890625, 15.542583465576172, 27.72604751586914, -5.849906921386719, -7.906667709350586, 5.901588439941406, 6.5676727294921875, 36.947731018066406, 16.6824951171875, 12.382644653320312, 13.895235061645508, 18.768585205078125, 77.52992248535156, 10.614410400390625, -1.9022903442382812, 11.362884521484375, 37.84093475341797, 0.5821609497070312, 32.57756805419922, 32.400917053222656, 40.946800231933594, -1.1579513549804688, 25.89300537109375, -2.357452392578125, 58.79095458984375, 13.239757537841797, 22.16596221923828, 22.01519012451172, 12.423261642456055, 24.398284912109375, 9.753768920898438, 30.341957092285156, -23.436752319335938, 5.571531295776367, 11.34913444519043, -2.2773361206054688, 2.1648712158203125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000143.npy"}
|
|
{"epoch": 0.20998531571218795, "step": 144, "batch_size": 64, "mean": 19.694934844970703, "std": 16.19609832763672, "min": -9.603065490722656, "p10": -0.5643539428710935, "median": 19.230892181396484, "p90": 39.355914306640635, "max": 72.08677673339844, "pos_frac": 0.875, "sample": [-1.392181396484375, 23.100021362304688, 10.618330001831055, 43.58427429199219, -9.603065490722656, 11.99725341796875, 28.04998207092285, 28.048019409179688, 23.41765594482422, 25.315906524658203, 36.91187286376953, 24.682384490966797, 18.67083740234375, 22.766647338867188, 28.979442596435547, 31.02092742919922, 14.83646011352539, 25.88074493408203, 40.37165069580078, 48.994789123535156, 35.602264404296875, 44.414100646972656, 3.4957637786865234, -4.846221923828125, 24.719818115234375, -0.297698974609375, 19.408958435058594, 5.243339538574219, 26.022680282592773, 3.6939945220947266, -8.360910415649414, 24.780790328979492, 29.6683292388916, 31.709373474121094, -2.4466781616210938, 31.85382080078125, 12.1932373046875, 10.120098114013672, 11.480045318603516, 18.512670516967773, 5.5839080810546875, 14.97998046875, 58.088966369628906, 24.30845069885254, 11.754539489746094, 22.92742156982422, -8.00942611694336, 11.273307800292969, 25.856491088867188, 36.985862731933594, 24.87757110595703, 48.562103271484375, 2.750396728515625, 5.8446044921875, 13.811790466308594, 2.02569580078125, 72.08677673339844, 11.38088607788086, 10.635515213012695, 14.055435180664062, 19.052825927734375, 11.920028686523438, 27.18157958984375, -0.6786346435546875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000144.npy"}
|
|
{"epoch": 0.21145374449339208, "step": 145, "batch_size": 64, "mean": 19.97324562072754, "std": 18.227842330932617, "min": -10.279708862304688, "p10": 0.8428533554077149, "median": 14.21449089050293, "p90": 45.570151901245126, "max": 77.8551025390625, "pos_frac": 0.90625, "sample": [11.736526489257812, 4.117156982421875, 21.306381225585938, 77.8551025390625, 30.242923736572266, 51.75091552734375, 38.84130859375, 12.827312469482422, -10.279708862304688, 21.268596649169922, 4.748199462890625, 9.746795654296875, 30.309860229492188, 7.141864776611328, 19.656543731689453, 22.11675262451172, 41.673789978027344, 12.451013565063477, 14.00802993774414, -1.9667720794677734, 0.8084621429443359, 9.885902404785156, 9.708511352539062, 61.42003631591797, 14.759597778320312, 2.7040023803710938, 10.196632385253906, 0.9230995178222656, 11.445465087890625, 43.25555419921875, 13.984277725219727, 14.361602783203125, 32.954986572265625, -4.572395324707031, -0.1134490966796875, 31.137786865234375, 4.639556884765625, 4.988447189331055, 20.949623107910156, -3.0992050170898438, 18.153247833251953, 14.067378997802734, 50.28887939453125, 1.9002723693847656, 35.732276916503906, 57.28694152832031, 21.039161682128906, -5.468746185302734, 24.03913116455078, 48.414527893066406, 5.384807586669922, 13.442449569702148, 37.578826904296875, 33.48576354980469, 24.05782699584961, 3.138570785522461, 9.068073272705078, 16.562824249267578, 12.6495361328125, 31.50177764892578, 46.5621223449707, 33.63260269165039, 3.298490524291992, 42.57989501953125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000145.npy"}
|
|
{"epoch": 0.21292217327459617, "step": 146, "batch_size": 64, "mean": 16.486042022705078, "std": 17.46185874938965, "min": -13.014801025390625, "p10": -3.8581787109375, "median": 14.199779510498047, "p90": 40.97269744873047, "max": 68.16824340820312, "pos_frac": 0.8125, "sample": [17.223129272460938, 8.700721740722656, 48.31214904785156, -10.572547912597656, 11.325847625732422, 0.422332763671875, 37.23325729370117, 13.925888061523438, 20.528076171875, 13.233993530273438, 43.12376403808594, 8.677864074707031, 6.535037994384766, 21.830360412597656, 18.14215850830078, 18.077369689941406, 20.368804931640625, 1.6453323364257812, 4.049201965332031, 0.38187217712402344, -0.198028564453125, 12.748603820800781, 27.542938232421875, 46.358306884765625, -6.6781463623046875, 21.006759643554688, 23.63055419921875, -13.014801025390625, 13.97650146484375, 31.310791015625, -3.1391525268554688, 13.540328979492188, 9.51649284362793, 22.578018188476562, 68.16824340820312, -7.417869567871094, 16.67359161376953, 7.417211532592773, 21.58893585205078, 41.006439208984375, 15.77318000793457, 40.89396667480469, 56.244075775146484, 15.203800201416016, 18.873023986816406, 14.423057556152344, 39.18010711669922, -11.290130615234375, 7.6788330078125, -1.7824554443359375, 32.183380126953125, 33.68006134033203, 24.61359405517578, 5.197771072387695, 3.6429824829101562, 4.7203826904296875, 8.746282577514648, -3.8776702880859375, 27.9747314453125, -1.8303375244140625, 39.12068176269531, -4.087892532348633, 43.85768127441406, -3.8126983642578125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000146.npy"}
|
|
{"epoch": 0.2143906020558003, "step": 147, "batch_size": 64, "mean": 20.196962356567383, "std": 18.97796630859375, "min": -12.436393737792969, "p10": 0.19122486114502024, "median": 15.527737617492676, "p90": 45.74578704833985, "max": 72.00064086914062, "pos_frac": 0.890625, "sample": [61.0794677734375, -11.696033477783203, 24.23553466796875, 40.253326416015625, 10.680023193359375, 6.030391693115234, 24.61065673828125, 42.79480743408203, 11.18463134765625, 2.1943588256835938, 5.893123626708984, 16.6268310546875, 16.676095962524414, 25.794898986816406, 19.530155181884766, 59.833763122558594, 31.508182525634766, 15.638647079467773, 5.695220947265625, 38.19615936279297, 53.00959396362305, 6.3210296630859375, 31.361583709716797, 7.473894119262695, 72.00064086914062, 10.528244018554688, 56.19264221191406, 33.3751106262207, 14.297599792480469, 30.16997528076172, 13.175312042236328, -0.49514007568359375, 0.9139156341552734, 45.05389404296875, 24.34368133544922, 1.397918701171875, 22.925251007080078, 70.9691162109375, 13.410711288452148, 9.632938385009766, 27.179115295410156, 7.14483642578125, 3.6775131225585938, 7.6988372802734375, 12.261825561523438, 24.960548400878906, 5.4676361083984375, 16.728260040283203, 29.589370727539062, -1.3377151489257812, -0.8939361572265625, 17.891895294189453, 15.416828155517578, 24.84344482421875, -0.118499755859375, 11.79891586303711, 46.04231262207031, 15.148796081542969, 18.891754150390625, -9.112884521484375, 42.062744140625, -12.436393737792969, 14.600082397460938, 12.2821044921875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000147.npy"}
|
|
{"epoch": 0.21585903083700442, "step": 148, "batch_size": 64, "mean": 16.166454315185547, "std": 16.389705657958984, "min": -15.479751586914062, "p10": 0.42838096618652377, "median": 11.965677261352539, "p90": 40.41294326782227, "max": 55.543212890625, "pos_frac": 0.90625, "sample": [0.7312583923339844, 16.966552734375, 2.72265625, 46.824005126953125, 21.12390899658203, 15.466133117675781, 37.491798400878906, 13.679401397705078, -3.8210525512695312, 54.754554748535156, 11.415626525878906, 36.76849365234375, 15.470394134521484, 23.96526336669922, 31.84325408935547, 40.88789367675781, 4.771402359008789, -4.382049560546875, 0.29857635498046875, -4.4742279052734375, 31.0682373046875, 23.776290893554688, 4.579202651977539, 22.123992919921875, 41.857730865478516, 13.927703857421875, 21.45368194580078, 8.368721008300781, 48.8790168762207, -2.3347034454345703, 39.104087829589844, 6.932861328125, 8.748617172241211, 13.918853759765625, 1.3931236267089844, 9.303583145141602, 2.0929107666015625, 14.149993896484375, 27.467788696289062, 6.661975860595703, 39.304725646972656, 11.134490966796875, -10.585418701171875, 4.894012451171875, 10.182062149047852, 8.413116455078125, 20.102203369140625, 5.283149719238281, 15.725807189941406, 2.5824050903320312, -15.479751586914062, 13.306648254394531, 14.502523422241211, 7.816801071166992, 12.515727996826172, 3.326211929321289, 10.503799438476562, 53.9781494140625, 55.543212890625, 3.701976776123047, 6.374021530151367, 9.492729187011719, 37.32911682128906, 8.727855682373047], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000148.npy"}
|
|
{"epoch": 0.2173274596182085, "step": 149, "batch_size": 64, "mean": 20.686452865600586, "std": 16.941299438476562, "min": -9.9119873046875, "p10": 0.33076858520507846, "median": 20.37682342529297, "p90": 41.4132667541504, "max": 68.23854064941406, "pos_frac": 0.90625, "sample": [28.269813537597656, 21.875473022460938, 54.19309997558594, 12.066181182861328, 28.725860595703125, 9.352859497070312, 10.275625228881836, -1.3923530578613281, -2.1323013305664062, 22.599048614501953, 7.606607437133789, 11.921894073486328, 24.25967788696289, 23.086055755615234, 5.302845001220703, -8.149375915527344, 47.16265869140625, 25.299560546875, 4.8352203369140625, 26.89435577392578, 12.300384521484375, 39.37914276123047, 35.51850891113281, 7.406393051147461, 8.980081558227539, 24.549896240234375, 11.276453018188477, 36.05241394042969, 33.0504035949707, 34.02122497558594, 36.337890625, 0.20153045654296875, 68.23854064941406, 6.59416389465332, 35.847496032714844, 18.561561584472656, 56.675750732421875, 10.016502380371094, 2.924121856689453, 30.898521423339844, 15.454700469970703, 35.01264572143555, 16.483285903930664, 23.735979080200195, 0.63232421875, 30.086729049682617, 12.176822662353516, 32.65692138671875, 10.177703857421875, -0.2676239013671875, 20.884765625, 23.140037536621094, -1.7550048828125, 44.534461975097656, 13.290586471557617, 19.868881225585938, 5.628772735595703, 23.35576820373535, 42.2850341796875, 64.464599609375, 5.131965637207031, 8.42905044555664, 27.552837371826172, -9.9119873046875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000149.npy"}
|
|
{"epoch": 0.21879588839941264, "step": 150, "batch_size": 64, "mean": 19.51645278930664, "std": 19.830421447753906, "min": -19.64849090576172, "p10": 0.19786605834961019, "median": 15.738443374633789, "p90": 41.80986480712892, "max": 83.6280517578125, "pos_frac": 0.890625, "sample": [54.356597900390625, 72.18821716308594, 23.20549774169922, 7.80645751953125, 29.021873474121094, 1.0222320556640625, 22.41490936279297, 17.06012535095215, -19.64849090576172, 4.87451171875, 83.6280517578125, -3.3860034942626953, 18.762088775634766, 12.603771209716797, 7.147876739501953, 31.627883911132812, 12.114517211914062, 25.5294132232666, 20.300506591796875, 19.953475952148438, 15.62820816040039, 34.19017791748047, 61.148704528808594, 13.755050659179688, 35.67250061035156, 23.65575408935547, 42.92877197265625, 32.01708221435547, 11.385522842407227, 33.58829116821289, 4.82659912109375, -11.52197265625, 23.061683654785156, 36.755462646484375, 3.104339599609375, 3.681417465209961, 43.716278076171875, 12.425849914550781, 26.140823364257812, 3.7209625244140625, 25.464889526367188, 18.159751892089844, 3.2785682678222656, -0.15543365478515625, 39.19908142089844, -2.8770217895507812, 7.77252197265625, 21.586599349975586, 13.463783264160156, 11.678924560546875, 10.17791748046875, 13.713886260986328, 22.870880126953125, 2.5167770385742188, 30.19757652282715, 76.07391357421875, 13.28957748413086, 5.019260406494141, -4.320762634277344, 30.50183868408203, 15.848678588867188, 2.4266815185546875, -6.4892120361328125, 9.189277648925781], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000150.npy"}
|
|
{"epoch": 0.22026431718061673, "step": 151, "batch_size": 64, "mean": 21.386999130249023, "std": 18.111618041992188, "min": -13.894441604614258, "p10": 1.0023443222045907, "median": 17.430262565612793, "p90": 44.83815078735352, "max": 72.49441528320312, "pos_frac": 0.90625, "sample": [-8.92413330078125, 72.49441528320312, 27.967117309570312, 8.735389709472656, 2.48541259765625, 5.467498779296875, 25.919700622558594, 26.55435562133789, 24.402603149414062, 14.474761962890625, 45.239044189453125, -0.7842922210693359, -3.2748489379882812, 32.321022033691406, 12.31744384765625, 2.7325286865234375, 13.377132415771484, 15.029399871826172, 16.997665405273438, 20.386703491210938, 67.28134155273438, 32.60774612426758, 20.92919921875, 16.658740997314453, -6.250328063964844, 56.30420684814453, 13.479026794433594, 15.385955810546875, -1.6952590942382812, 19.832427978515625, 21.62196922302246, 12.03996467590332, 11.009403228759766, 14.478841781616211, 9.086563110351562, 53.034942626953125, -13.894441604614258, 41.35797119140625, 15.608757019042969, 24.0875244140625, 6.806257247924805, 24.651412963867188, 17.496124267578125, 53.56281280517578, 19.634017944335938, 41.34745788574219, 38.551971435546875, 5.682764053344727, 34.87669372558594, 31.813446044921875, 23.077438354492188, 8.7288818359375, 4.410472869873047, 40.435707092285156, 14.997306823730469, 1.9045238494873047, 50.992515563964844, 43.902732849121094, 36.73309326171875, 12.681884765625, 0.6156959533691406, 21.097501754760742, 17.36440086364746, 40.519309997558594], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000151.npy"}
|
|
{"epoch": 0.22173274596182085, "step": 152, "batch_size": 64, "mean": 22.185049057006836, "std": 19.241355895996094, "min": -18.010498046875, "p10": 2.849803924560547, "median": 17.72074317932129, "p90": 50.856134033203126, "max": 80.96493530273438, "pos_frac": 0.953125, "sample": [19.620132446289062, 9.345932006835938, 42.68840026855469, 50.39814758300781, 17.669940948486328, 36.303871154785156, 25.60085678100586, 24.25885009765625, 63.68925476074219, 15.178291320800781, 41.358787536621094, 36.67600631713867, 11.853439331054688, 11.119331359863281, 25.345863342285156, 60.012664794921875, 29.977638244628906, 9.696544647216797, 22.049198150634766, 8.600990295410156, 29.166664123535156, 12.843421936035156, -11.442331314086914, 16.939430236816406, 9.356147766113281, 65.42756652832031, 29.126739501953125, 54.2864990234375, 17.77154541015625, 2.9757080078125, 18.715484619140625, -18.010498046875, 26.311458587646484, 2.7958450317382812, 4.596408843994141, 23.933202743530273, 16.95465087890625, 29.266876220703125, 16.189247131347656, 9.028839111328125, 80.96493530273438, 9.369583129882812, 18.5029296875, 16.944580078125, 15.243854522705078, 5.32781982421875, 65.36955261230469, 9.750837326049805, 0.3111610412597656, 30.009552001953125, 19.930131912231445, 14.129936218261719, 7.811126708984375, 6.984382629394531, 51.05241394042969, 0.034488677978515625, 27.09967041015625, -4.3377685546875, 12.71868896484375, 38.204933166503906, 9.1651611328125, 2.397979736328125, 32.59539794921875, 32.58476257324219], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000152.npy"}
|
|
{"epoch": 0.22320117474302498, "step": 153, "batch_size": 64, "mean": 18.822778701782227, "std": 19.090059280395508, "min": -16.6564998626709, "p10": -0.5571786880493163, "median": 16.879642486572266, "p90": 44.42392311096193, "max": 78.7630615234375, "pos_frac": 0.875, "sample": [30.52532386779785, 18.591873168945312, 10.178192138671875, 3.3981857299804688, 7.353588104248047, 27.837799072265625, 2.769521713256836, 27.1343994140625, 3.1211700439453125, 68.54142761230469, 23.4266357421875, 23.163597106933594, 3.4764976501464844, 25.418933868408203, 48.39332580566406, -12.26513671875, 35.83148193359375, 4.18235969543457, 20.731735229492188, 38.3408203125, 22.3990478515625, 78.7630615234375, 13.127738952636719, 24.36564826965332, 18.671340942382812, 58.24431610107422, 3.5827178955078125, 5.797702789306641, 49.00077819824219, 22.864532470703125, 46.26933670043945, 4.865104675292969, 34.10646057128906, 3.305753707885742, 0.4634361267089844, 2.7691192626953125, 15.167411804199219, 24.21489715576172, 1.7287979125976562, -4.204925537109375, 9.69342041015625, 11.265914916992188, 4.1464385986328125, 34.03131103515625, 7.733386993408203, -0.3629112243652344, -16.6564998626709, 14.409088134765625, 28.992034912109375, -4.1346893310546875, 28.843643188476562, 52.71052551269531, 29.30889129638672, -2.7121410369873047, 26.010339736938477, -0.6404361724853516, 26.8758544921875, 12.712032318115234, 23.573951721191406, 40.117958068847656, 3.495159149169922, -3.8961715698242188, 5.201011657714844, 38.2857551574707], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000153.npy"}
|
|
{"epoch": 0.22466960352422907, "step": 154, "batch_size": 64, "mean": 18.761655807495117, "std": 19.334016799926758, "min": -19.064937591552734, "p10": -6.254926300048828, "median": 18.218921661376953, "p90": 39.85145568847656, "max": 66.89236450195312, "pos_frac": 0.828125, "sample": [2.320425033569336, 1.6690216064453125, -17.302459716796875, 0.017217636108398438, 66.89236450195312, 23.77165985107422, 7.106340408325195, 18.274063110351562, 37.124786376953125, -6.639533996582031, 11.709144592285156, 55.54548645019531, -9.207595825195312, 20.383773803710938, 29.036540985107422, 23.77981185913086, -1.608469009399414, -8.337417602539062, 15.619474411010742, 0.8951740264892578, 29.132301330566406, 29.788654327392578, 1.0852794647216797, 7.571804046630859, 7.9055023193359375, 36.59950256347656, 18.163780212402344, 37.576385498046875, 32.624778747558594, -6.4894866943359375, 8.279243469238281, 49.780006408691406, -3.9092636108398438, 40.1607666015625, -9.076993942260742, 39.129730224609375, 59.2406005859375, 50.8597412109375, 7.9613037109375, 38.3704833984375, 35.63874053955078, 30.3193359375, 6.286766052246094, 23.527847290039062, 31.412002563476562, 14.12984848022461, 15.7589111328125, 21.47101402282715, -0.8032608032226562, 27.8863525390625, 30.86016845703125, 18.955093383789062, -5.707618713378906, 13.72314453125, 16.654525756835938, -19.064937591552734, 32.7211799621582, 28.066452026367188, 25.529403686523438, 9.227005004882812, 9.596221923828125, 7.592218399047852, 21.66387939453125, 59.497711181640625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000154.npy"}
|
|
{"epoch": 0.2261380323054332, "step": 155, "batch_size": 64, "mean": 19.738880157470703, "std": 17.74778938293457, "min": -24.239852905273438, "p10": 0.862902641296387, "median": 17.449012756347656, "p90": 36.44425735473633, "max": 73.8837661743164, "pos_frac": 0.921875, "sample": [11.523017883300781, 7.892555236816406, 3.186433792114258, -2.0841522216796875, 24.050342559814453, 1.4467430114746094, 33.48439407348633, 0.714080810546875, 16.647876739501953, 0.11250114440917969, 3.5713272094726562, 11.135997772216797, 14.483131408691406, 9.4658203125, 20.25176239013672, 25.51762580871582, 30.990528106689453, 28.485862731933594, 25.82483673095703, 14.162612915039062, 28.572288513183594, 15.761077880859375, 25.73107147216797, 33.41398620605469, 61.93503189086914, 6.355264663696289, 33.937477111816406, 34.551673889160156, 12.289432525634766, 19.63079833984375, 6.899335861206055, 1.8576431274414062, 17.382205963134766, 23.44329833984375, 47.833526611328125, -24.239852905273438, 55.63807678222656, 8.175270080566406, 26.407188415527344, 5.3546295166015625, 9.53370475769043, 32.0196533203125, 36.7635498046875, 14.4842529296875, 73.8837661743164, 20.56487274169922, 65.50906372070312, 30.61968994140625, -12.111526489257812, 14.592803955078125, 13.011398315429688, 35.699241638183594, 17.515819549560547, 30.85387420654297, 19.28342056274414, -5.804100036621094, 30.71774673461914, 38.03009796142578, 21.09654998779297, 1.210153579711914, 34.2202033996582, 8.405059814453125, -1.4334907531738281, 12.829856872558594], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000155.npy"}
|
|
{"epoch": 0.2276064610866373, "step": 156, "batch_size": 64, "mean": 15.239997863769531, "std": 20.016712188720703, "min": -37.64312744140625, "p10": -7.2251132965087885, "median": 13.131226539611816, "p90": 43.13808174133302, "max": 64.48558044433594, "pos_frac": 0.796875, "sample": [35.74999237060547, 13.351806640625, 45.88493347167969, 47.998779296875, 29.900665283203125, 12.303878784179688, -18.384811401367188, 1.1488265991210938, 9.29510498046875, 11.30328369140625, -12.946884155273438, 46.23855972290039, 10.875114440917969, -0.9014663696289062, -6.369091033935547, -24.78331756591797, 24.612377166748047, 40.440025329589844, 48.965606689453125, 16.920379638671875, 9.4453125, 39.13948059082031, 1.98095703125, 27.18840789794922, 10.339759826660156, 3.3036117553710938, 5.229059219360352, 17.70044708251953, 64.48558044433594, 9.840309143066406, 53.53910827636719, 37.45586395263672, 44.29439163208008, 16.0343017578125, -7.59197998046875, 0.7997570037841797, 11.451026916503906, -8.033218383789062, -37.64312744140625, -2.5310134887695312, 21.57654571533203, -1.3680648803710938, 4.38177490234375, 26.424896240234375, 22.832229614257812, 5.6504364013671875, 21.699838638305664, 40.180564880371094, 15.074462890625, 12.910646438598633, 0.11267852783203125, 20.383033752441406, 12.429237365722656, 24.082231521606445, -24.98992156982422, -2.8286590576171875, 30.08795166015625, 27.77252960205078, 20.85950469970703, 12.730117797851562, 13.396976470947266, 14.935798645019531, -4.183727264404297, 33.17695236206055], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000156.npy"}
|
|
{"epoch": 0.2290748898678414, "step": 157, "batch_size": 64, "mean": 21.57200050354004, "std": 19.284809112548828, "min": -9.315010070800781, "p10": -0.7855901718139645, "median": 18.865703582763672, "p90": 45.364188766479494, "max": 76.09053039550781, "pos_frac": 0.875, "sample": [37.70735168457031, 34.64685821533203, 24.817787170410156, 46.33726501464844, 17.048316955566406, 3.3926048278808594, 11.470443725585938, 45.84323501586914, 8.39678955078125, 42.48442077636719, 22.95870590209961, 44.24641418457031, 17.214658737182617, 31.019668579101562, 33.26971435546875, 27.260894775390625, 31.20885467529297, 21.606443405151367, 26.349952697753906, 27.69768524169922, 20.36486053466797, -3.1743927001953125, 31.776634216308594, -0.9278717041015625, 32.040374755859375, 24.999801635742188, -0.9478073120117188, 17.366546630859375, 6.871429443359375, 49.044349670410156, -6.12347412109375, 12.408445358276367, 13.527114868164062, 1.8458728790283203, 7.384033203125, 29.21766471862793, 38.020416259765625, 10.53399658203125, 33.91529846191406, -0.4535999298095703, -1.9526710510253906, 64.55702209472656, 7.750143051147461, 6.966789245605469, 2.2879676818847656, 62.18735122680664, 37.23704528808594, -6.59412956237793, 71.06258392333984, 8.260677337646484, 36.44751739501953, 5.9443511962890625, 1.4263496398925781, 15.525993347167969, 36.47038269042969, 28.55010986328125, 76.09053039550781, 25.020233154296875, 12.645286560058594, -9.315010070800781, 17.13922119140625, 5.855751037597656, 0.677734375, 3.6990585327148438], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000157.npy"}
|
|
{"epoch": 0.2305433186490455, "step": 158, "batch_size": 64, "mean": 21.917728424072266, "std": 21.43829917907715, "min": -3.793283462524414, "p10": -0.5671649932861323, "median": 17.011685371398926, "p90": 47.815019226074234, "max": 77.486328125, "pos_frac": 0.875, "sample": [7.046031951904297, 1.0474433898925781, 18.72382164001465, 15.800445556640625, 18.222925186157227, 12.726930618286133, 5.333473205566406, 4.127265930175781, 24.82436752319336, 26.216144561767578, 41.33935546875, -2.216217041015625, 43.1796875, 34.563297271728516, -0.758392333984375, -3.3309288024902344, 1.1276626586914062, 33.59893035888672, 6.941440582275391, 3.799407958984375, 71.025390625, -0.12096786499023438, 38.02294921875, 8.070648193359375, 49.80158996582031, 5.8312225341796875, 37.60417938232422, 9.161510467529297, 6.96160888671875, 3.824779510498047, 33.900447845458984, 29.01848602294922, 5.759956359863281, 37.8873291015625, -0.9044532775878906, -1.7020263671875, 57.09925842285156, 37.0504150390625, 1.9552383422851562, 71.93861389160156, 72.90760803222656, 7.335540771484375, 9.069622039794922, 77.486328125, 2.7817764282226562, 22.496841430664062, 0.5932044982910156, 27.149383544921875, 9.872360229492188, 12.453926086425781, 76.30975341796875, 40.148704528808594, 31.171409606933594, 32.07734680175781, -1.630777359008789, 37.37318420410156, 13.598312377929688, -3.793283462524414, 24.975631713867188, 27.820152282714844, 32.826202392578125, 25.509536743164062, 25.3414306640625, 4.361167907714844], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000158.npy"}
|
|
{"epoch": 0.23201174743024963, "step": 159, "batch_size": 64, "mean": 19.63745880126953, "std": 20.59627342224121, "min": -10.096832275390625, "p10": -5.359132575988769, "median": 16.219131469726562, "p90": 42.122381973266606, "max": 87.7291030883789, "pos_frac": 0.859375, "sample": [10.619674682617188, 8.324165344238281, 40.12809753417969, -7.698600769042969, 26.612075805664062, 31.167327880859375, -9.834419250488281, -9.519214630126953, 36.90315246582031, 22.17352294921875, 18.996856689453125, 5.477333068847656, 7.316944122314453, 0.44384765625, 16.19640350341797, 13.946273803710938, 25.015594482421875, 33.319759368896484, 47.92442321777344, 40.353885650634766, 87.7291030883789, 4.317583084106445, -10.096832275390625, 4.09027099609375, 2.9163150787353516, -0.8993377685546875, 10.525962829589844, -5.011077880859375, 9.09222412109375, 16.8623046875, 8.021148681640625, 41.81035614013672, -7.061470031738281, 12.113285064697266, 10.35723876953125, -5.508298873901367, 22.925891876220703, 15.424644470214844, 7.7399444580078125, 57.502105712890625, 38.70245361328125, 41.38804626464844, 18.46219825744629, 25.60547637939453, 9.020164489746094, 75.35848999023438, 6.400510787963867, 0.2193756103515625, 3.297882080078125, 41.35125732421875, 44.68316650390625, 16.241859436035156, 18.107070922851562, -8.107818603515625, 21.533477783203125, 32.87591552734375, 36.82384490966797, 42.256107330322266, 12.171211242675781, 62.15309143066406, 32.222267150878906, 18.108272552490234, 2.3001556396484375, 24.904434204101562], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000159.npy"}
|
|
{"epoch": 0.23348017621145375, "step": 160, "batch_size": 64, "mean": 19.808996200561523, "std": 22.115102767944336, "min": -16.774818420410156, "p10": -4.5050712585449215, "median": 15.291259765625, "p90": 48.769586944580084, "max": 89.05693054199219, "pos_frac": 0.828125, "sample": [29.175262451171875, 8.954660415649414, 41.27311706542969, 25.877716064453125, 2.3206214904785156, -6.844490051269531, 13.176536560058594, 33.04322814941406, 40.483367919921875, -4.784505844116211, -16.774818420410156, 7.9530487060546875, 47.34766387939453, 51.02055358886719, 32.3309326171875, -2.2445068359375, 1.2211761474609375, 19.17005157470703, 5.0418548583984375, 49.37898254394531, 68.53126525878906, -5.9022064208984375, 31.064468383789062, -4.540824890136719, 24.509803771972656, 4.5968475341796875, 15.908340454101562, 10.815078735351562, 83.64114379882812, 24.587921142578125, 6.978565216064453, 20.794294357299805, 14.913299560546875, 51.34388732910156, 2.5916671752929688, -4.4216461181640625, 4.379402160644531, 38.510345458984375, 64.33984375, 89.05693054199219, 25.23598861694336, 25.625869750976562, -6.0334320068359375, 44.685142517089844, 30.525604248046875, 5.9208221435546875, 1.0087528228759766, 14.189796447753906, 4.9766082763671875, 24.496437072753906, 14.155281066894531, 16.014556884765625, 15.669219970703125, -4.052295684814453, 6.051300048828125, 26.919029235839844, 3.2236461639404297, -0.5265827178955078, -6.687583923339844, 31.248199462890625, 1.7874069213867188, 38.483726501464844, 13.369308471679688, 22.67005157470703], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000160.npy"}
|
|
{"epoch": 0.23494860499265785, "step": 161, "batch_size": 64, "mean": 20.026674270629883, "std": 19.912761688232422, "min": -12.71551513671875, "p10": -5.371806907653808, "median": 17.887900352478027, "p90": 45.76089935302735, "max": 83.07313537597656, "pos_frac": 0.84375, "sample": [25.281509399414062, 46.69500732421875, -3.936859130859375, 59.01018524169922, 43.88462829589844, -6.671638488769531, -12.71551513671875, -5.95574951171875, 24.890792846679688, -5.45756721496582, 13.669204711914062, 40.283111572265625, 25.5118408203125, -7.64067268371582, 6.3605499267578125, 11.935897827148438, 44.25572204589844, 22.67740821838379, 27.332138061523438, 57.08326721191406, 18.14442253112793, 14.83224105834961, 21.77322006225586, -5.171699523925781, 16.63500213623047, 2.807056427001953, 24.123573303222656, 5.987342834472656, 63.307411193847656, 83.07313537597656, 25.576919555664062, 10.611936569213867, 14.176132202148438, -1.8030662536621094, 39.004150390625, 30.043479919433594, 0.075775146484375, -6.050222396850586, 6.043052673339844, 24.188278198242188, 24.039840698242188, 9.681663513183594, -10.5760498046875, 17.631378173828125, 29.232894897460938, 22.31696319580078, 47.48332214355469, 39.067665100097656, 5.568666458129883, 15.007186889648438, 35.23094177246094, 3.00445556640625, 0.8494720458984375, 46.405975341796875, 7.546012878417969, 8.378021240234375, 12.582664489746094, 27.614280700683594, 41.1926383972168, 0.41518211364746094, 9.491674423217773, 34.693763732910156, 34.37348556518555, 26.60370635986328], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000161.npy"}
|
|
{"epoch": 0.23641703377386197, "step": 162, "batch_size": 64, "mean": 19.227447509765625, "std": 20.753376007080078, "min": -13.53802490234375, "p10": -1.3936769485473621, "median": 15.647308349609375, "p90": 41.681396102905275, "max": 114.13113403320312, "pos_frac": 0.875, "sample": [-5.331031799316406, 13.886913299560547, 19.894256591796875, 27.435882568359375, 0.4426422119140625, 49.0672607421875, 2.334867477416992, 37.295196533203125, 11.884883880615234, -1.9401683807373047, -4.6692352294921875, 40.76845932006836, 12.759101867675781, -11.060417175292969, -2.8785858154296875, 10.02762222290039, 34.352294921875, 23.701637268066406, 12.94708251953125, 24.692283630371094, 33.07232666015625, 9.510513305664062, 33.742637634277344, 4.58856201171875, 0.025541305541992188, 29.187103271484375, 20.224205017089844, 21.913986206054688, 21.777175903320312, 25.835830688476562, 34.705177307128906, -10.337677001953125, -0.1185302734375, 11.954887390136719, 36.65961456298828, 21.020076751708984, 42.072654724121094, 114.13113403320312, 25.24053955078125, 16.50440216064453, 14.790214538574219, 21.403640747070312, 28.427322387695312, 45.04624938964844, 25.858150482177734, 4.0194091796875, 0.709320068359375, 9.644584655761719, 23.300430297851562, 3.7340087890625, 4.477264404296875, 56.20269012451172, 7.8127288818359375, 2.1597213745117188, 68.36724853515625, 27.497215270996094, -13.53802490234375, 2.18914794921875, 6.246299743652344, 30.04180908203125, 13.414642333984375, 1.2094497680664062, 14.47671890258789, 45.74528503417969], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000162.npy"}
|
|
{"epoch": 0.23788546255506607, "step": 163, "batch_size": 64, "mean": 18.588184356689453, "std": 16.928220748901367, "min": -15.1075439453125, "p10": 0.4598331451416018, "median": 17.38038921356201, "p90": 39.42785491943359, "max": 62.53645324707031, "pos_frac": 0.90625, "sample": [6.927337646484375, 5.096281051635742, 53.51115417480469, 21.9515380859375, -5.794218063354492, -5.722625732421875, 10.565399169921875, 2.4973373413085938, 2.9620819091796875, 24.788551330566406, 16.699615478515625, 9.905593872070312, 23.649303436279297, 61.19044494628906, 17.561674118041992, 37.95044708251953, 26.66124725341797, 50.89898681640625, 4.292724609375, 23.72936248779297, 12.276313781738281, 24.899703979492188, 18.8223876953125, 21.010276794433594, 19.87346649169922, 3.8104076385498047, 36.953758239746094, -12.073554992675781, 9.246406555175781, 17.19910430908203, 21.275283813476562, 5.743236541748047, 21.5406494140625, 11.062171936035156, 23.47592544555664, -3.6771373748779297, -7.5429840087890625, 52.97901916503906, 39.05207061767578, 6.383522033691406, 11.95806884765625, 9.020912170410156, 23.352455139160156, 33.38726806640625, 22.096189498901367, 26.759414672851562, 7.734855651855469, 62.53645324707031, 26.212112426757812, 0.6746044158935547, 10.478008270263672, 16.728172302246094, 15.843460083007812, 39.588905334472656, 28.491676330566406, 9.142276763916016, 28.23335075378418, 13.353221893310547, -15.1075439453125, 2.7180538177490234, 45.507652282714844, 22.020233154296875, 36.913970947265625, 0.36778831481933594], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000163.npy"}
|
|
{"epoch": 0.2393538913362702, "step": 164, "batch_size": 64, "mean": 20.988468170166016, "std": 15.990802764892578, "min": -12.768966674804688, "p10": 2.798645782470704, "median": 19.882247924804688, "p90": 37.23454055786133, "max": 73.93821716308594, "pos_frac": 0.9375, "sample": [-5.667625427246094, 31.321044921875, 31.877662658691406, 43.42571258544922, 30.534019470214844, 24.958351135253906, 19.23267364501953, 43.607269287109375, 21.771305084228516, 29.842575073242188, 37.05982971191406, 7.477085113525391, -2.3643016815185547, 28.14226531982422, 19.914443969726562, 10.393814086914062, 23.926918029785156, 28.741134643554688, 2.181974411010742, 7.372438430786133, -12.768966674804688, 22.646156311035156, 23.76055908203125, 21.196075439453125, 20.285280227661133, 14.355850219726562, 2.4225997924804688, 10.877029418945312, 19.11853790283203, 8.87407112121582, 12.948280334472656, 5.01605224609375, 25.401702880859375, 35.083946228027344, 19.850051879882812, 31.946701049804688, 13.846214294433594, 37.276519775390625, 26.099130630493164, 36.96943664550781, 15.467079162597656, 16.983989715576172, 9.393226623535156, 23.623046875, 13.836469650268555, 10.493576049804688, 8.50701904296875, 3.67608642578125, 5.2883758544921875, 37.13658905029297, 25.35466766357422, 17.132034301757812, 59.07159423828125, 17.3348388671875, 58.018890380859375, 49.685035705566406, 1.1511383056640625, 23.94097900390625, 34.30610656738281, -4.521415710449219, 18.51987075805664, 12.183425903320312, 73.93821716308594, 3.7874298095703125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000164.npy"}
|
|
{"epoch": 0.24082232011747431, "step": 165, "batch_size": 64, "mean": 14.148780822753906, "std": 14.90626049041748, "min": -23.65552520751953, "p10": -1.7030897140502927, "median": 10.004853248596191, "p90": 35.17744750976563, "max": 49.629119873046875, "pos_frac": 0.84375, "sample": [13.099433898925781, 22.508386611938477, 2.1609268188476562, 7.343254089355469, 5.624359130859375, 1.9485340118408203, 17.236282348632812, 5.4837188720703125, 6.3332061767578125, -0.012603759765625, 1.8339385986328125, -23.65552520751953, 17.093338012695312, 14.30453872680664, 8.4237060546875, -4.90655517578125, -3.096698760986328, 15.027175903320312, 32.114585876464844, 34.517822265625, 25.60379409790039, 8.120513916015625, 10.41448974609375, 45.634429931640625, 8.010551452636719, 35.97505187988281, 9.628644943237305, -6.8137664794921875, 9.203079223632812, 26.956497192382812, 23.498252868652344, 9.551416397094727, 35.46014404296875, 28.80048370361328, 4.999664306640625, -3.1103134155273438, 15.100349426269531, -2.5709457397460938, 12.624153137207031, 2.7441978454589844, 5.715063095092773, -1.574014663696289, 16.566909790039062, 22.255605697631836, 45.3978271484375, 8.118396759033203, 29.85006332397461, 26.438308715820312, 15.394096374511719, 0.02845001220703125, -0.3876953125, 19.315582275390625, 11.65340805053711, -1.7584075927734375, 10.381061553955078, 32.8297119140625, 9.229949951171875, 44.77598571777344, 43.150489807128906, 49.629119873046875, 1.3335151672363281, 8.085746765136719, 5.4931182861328125, 30.38720703125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000165.npy"}
|
|
{"epoch": 0.2422907488986784, "step": 166, "batch_size": 64, "mean": 23.267839431762695, "std": 23.729537963867188, "min": -9.22419548034668, "p10": 1.312217330932618, "median": 17.488187789916992, "p90": 60.76835327148438, "max": 111.61956787109375, "pos_frac": 0.90625, "sample": [71.93453979492188, 47.41914367675781, -1.0319347381591797, 76.0985107421875, 0.9863433837890625, 21.842029571533203, 7.381366729736328, 13.744293212890625, 59.11027526855469, -3.03009033203125, 26.84320068359375, 8.718854904174805, 3.1556529998779297, 30.065940856933594, 2.8353118896484375, 13.671689987182617, 21.39752197265625, 15.880401611328125, 37.05353927612305, 23.65113067626953, 18.897192001342773, 111.61956787109375, 3.074207305908203, 3.4822044372558594, -9.22419548034668, -5.923311233520508, 10.938468933105469, 16.362770080566406, 8.0517578125, 42.20611572265625, 61.47895812988281, 70.91293334960938, 20.508771896362305, 43.3389778137207, 9.915534973144531, 75.36898803710938, 11.476016998291016, 19.166786193847656, 29.464187622070312, 25.356857299804688, 10.950504302978516, 19.71530532836914, 6.118385314941406, 20.429588317871094, 50.51677703857422, 16.39560317993164, 66.38595581054688, 15.049110412597656, 19.852807998657227, 32.54559326171875, 12.778266906738281, 34.304718017578125, -3.7387542724609375, 3.9210205078125, 2.072589874267578, -3.9463539123535156, 30.95294952392578, 8.183670043945312, 18.580772399902344, 7.5385589599609375, 35.981815338134766, 10.950754165649414, 20.920143127441406, 8.481437683105469], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000166.npy"}
|
|
{"epoch": 0.24375917767988253, "step": 167, "batch_size": 64, "mean": 24.126216888427734, "std": 19.15192985534668, "min": -29.309402465820312, "p10": 4.420072555541992, "median": 24.386606216430664, "p90": 48.97690391540528, "max": 68.26449584960938, "pos_frac": 0.921875, "sample": [40.93754577636719, 20.558795928955078, 35.98396301269531, 34.196563720703125, 40.56683349609375, 49.45211410522461, 8.041656494140625, 16.92511749267578, 35.49976348876953, 27.376468658447266, 59.350830078125, 47.47997283935547, 25.39580535888672, -0.633026123046875, 8.0479736328125, 13.187944412231445, 29.314163208007812, 5.57745361328125, 31.59581756591797, 36.215850830078125, 12.533807754516602, 11.815643310546875, 15.122589111328125, 16.5506591796875, 25.23663330078125, -29.309402465820312, 68.26449584960938, 38.99378204345703, 0.0420074462890625, 50.938140869140625, -1.142852783203125, 8.682098388671875, 44.74013900756836, 6.788475036621094, 53.39068603515625, 30.18171501159668, 47.868080139160156, 28.411630630493164, 36.73088073730469, -12.249641418457031, 23.192718505859375, 45.734046936035156, 57.84123229980469, 23.839073181152344, 63.421661376953125, 7.212009429931641, 24.616771697998047, 10.301559448242188, 31.704025268554688, 33.665809631347656, 31.202255249023438, 13.714752197265625, 25.496089935302734, 10.036216735839844, 4.686567306518555, 39.378936767578125, 24.15644073486328, 14.582155227661133, 5.4399566650390625, -4.102073669433594, 18.131061553955078, 4.30586051940918, 10.8775634765625, 5.9820709228515625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000167.npy"}
|
|
{"epoch": 0.24522760646108663, "step": 168, "batch_size": 64, "mean": 19.041858673095703, "std": 15.39984130859375, "min": -10.596649169921875, "p10": 0.22038402557373055, "median": 15.394889831542969, "p90": 38.374985504150395, "max": 67.79399871826172, "pos_frac": 0.921875, "sample": [11.197067260742188, 12.3328857421875, 11.118095397949219, 6.600429534912109, 0.08102607727050781, 15.094263076782227, 27.81145477294922, 8.300237655639648, 10.636219024658203, -2.9877471923828125, -8.368820190429688, 24.33294677734375, 40.45603942871094, 38.48059844970703, 0.3007049560546875, 36.155487060546875, 30.82622528076172, 38.12855529785156, 35.2130126953125, 24.338478088378906, 33.8961067199707, 43.16607666015625, 47.210968017578125, 35.48657989501953, 12.364891052246094, 12.501518249511719, -0.0222930908203125, 37.1434326171875, 22.288700103759766, 8.876174926757812, 15.565109252929688, 13.069198608398438, -5.541786193847656, 9.283012390136719, 15.22467041015625, 36.27768325805664, 13.04677963256836, 0.1859607696533203, 9.198776245117188, 24.29718780517578, 12.733779907226562, 30.23919677734375, 5.237396240234375, 17.23603057861328, 24.272872924804688, 41.26322937011719, 8.318761825561523, 1.0819625854492188, 22.690093994140625, 12.64837646484375, 32.17535400390625, 4.036806106567383, 18.413358688354492, 5.544097900390625, 27.972068786621094, 19.496864318847656, 23.03874397277832, -10.596649169921875, 24.490062713623047, 4.784614562988281, 67.79399871826172, 7.817436218261719, 40.4860954284668, 33.938575744628906], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000168.npy"}
|
|
{"epoch": 0.24669603524229075, "step": 169, "batch_size": 64, "mean": 17.245698928833008, "std": 21.965595245361328, "min": -21.029052734375, "p10": -4.611370849609374, "median": 12.022171020507812, "p90": 44.29730834960938, "max": 98.49119567871094, "pos_frac": 0.78125, "sample": [10.632064819335938, 24.65524673461914, -2.3065452575683594, 9.421577453613281, 17.120315551757812, 40.082664489746094, 10.321853637695312, 26.037673950195312, 16.248504638671875, 0.8570632934570312, -6.131649017333984, 26.808258056640625, -21.029052734375, -0.4720649719238281, 5.534900665283203, -3.6247100830078125, 2.2386932373046875, -2.619171142578125, 11.929641723632812, 26.69013214111328, 18.891582489013672, -0.13358306884765625, 12.01531982421875, -5.500247955322266, 45.7088508605957, 31.278976440429688, 16.25306510925293, 7.029632568359375, 22.707412719726562, 4.793107986450195, 34.68598175048828, 4.8132476806640625, 3.215595245361328, 17.374404907226562, 24.171245574951172, 25.273956298828125, 43.37347412109375, 1.1361141204833984, 22.292797088623047, 49.87847900390625, 14.718595504760742, 98.49119567871094, 2.7884445190429688, -0.0462188720703125, 29.288101196289062, 9.634700775146484, 12.029022216796875, -8.220924377441406, 69.931884765625, 8.393943786621094, 79.83767700195312, -5.373542785644531, 30.254776000976562, 7.1543121337890625, 44.6932373046875, 5.5480194091796875, 22.90125274658203, -5.0342254638671875, 16.807445526123047, 16.393497467041016, -2.9679412841796875, 26.925678253173828, 67.7008056640625, -9.779815673828125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000169.npy"}
|
|
{"epoch": 0.24816446402349487, "step": 170, "batch_size": 64, "mean": 21.571361541748047, "std": 18.903135299682617, "min": -24.69171142578125, "p10": 0.4831369400024424, "median": 19.555021286010742, "p90": 49.639342880249025, "max": 64.28924560546875, "pos_frac": 0.90625, "sample": [45.704322814941406, -0.7277145385742188, 15.38958740234375, 25.198272705078125, 33.1151237487793, 14.573596954345703, 29.53314208984375, 10.980819702148438, 27.88030242919922, -3.158721923828125, 38.7430419921875, -4.763759613037109, 21.780540466308594, -9.325645446777344, 4.510339736938477, 2.9411239624023438, 27.63367462158203, 64.28924560546875, 23.420875549316406, 51.26066589355469, 2.541248321533203, 1.4444713592529297, 41.62643814086914, 28.38015365600586, 0.071136474609375, 13.368614196777344, -24.69171142578125, 4.7211456298828125, 10.681488037109375, 42.16117477416992, 42.278900146484375, 48.447174072265625, 16.09967803955078, 27.65155029296875, 53.48914337158203, 3.0552291870117188, 53.05671691894531, 5.4099273681640625, 6.679225921630859, 55.3162841796875, 36.8823356628418, 15.745819091796875, 7.664867401123047, 50.150272369384766, 22.376136779785156, -0.066741943359375, 15.202787399291992, 7.243921279907227, 7.0033721923828125, 5.776887893676758, 25.535118103027344, 26.347412109375, 22.27349853515625, 17.32950210571289, 53.673728942871094, 10.030715942382812, 24.280685424804688, 40.93219757080078, 5.310070037841797, 32.47675323486328, 46.821563720703125, 14.728208541870117, 34.6478271484375, 7.433429718017578], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000170.npy"}
|
|
{"epoch": 0.24963289280469897, "step": 171, "batch_size": 64, "mean": 18.75889015197754, "std": 19.87282371520996, "min": -17.434616088867188, "p10": -0.8283037185668927, "median": 15.055505752563477, "p90": 41.9427833557129, "max": 95.2591552734375, "pos_frac": 0.890625, "sample": [3.8985366821289062, 43.82563781738281, 39.69757843017578, 6.558219909667969, 6.363124847412109, -1.608551025390625, 3.0163421630859375, 12.818046569824219, -9.972412109375, 26.396987915039062, 4.3884124755859375, 22.485698699951172, 25.830230712890625, 16.425888061523438, 16.85724639892578, 20.22882080078125, 15.183334350585938, 3.5000152587890625, 14.927677154541016, -2.9083786010742188, 35.29151153564453, 24.20395278930664, 25.40753173828125, 12.878171920776367, 56.238258361816406, 24.593799591064453, 14.673301696777344, 95.2591552734375, -17.434616088867188, 20.544105529785156, 12.92388916015625, -9.128097534179688, 45.35595703125, 30.522903442382812, 19.492198944091797, 4.787384033203125, 45.6455078125, 32.223655700683594, 6.008182525634766, 82.02218627929688, 20.282745361328125, 23.32592010498047, 35.53521728515625, 7.918107986450195, -16.160171508789062, 16.00030517578125, 34.574684143066406, 25.381622314453125, 1.0402717590332031, -2.7800216674804688, 4.150016784667969, 11.190093994140625, 34.867820739746094, 20.352399826049805, 13.69390869140625, 14.908079147338867, 12.1920166015625, 37.99012756347656, 1.1392364501953125, 14.406867980957031, 4.985294342041016, 0.9922733306884766, 42.90501403808594, 12.255744934082031], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000171.npy"}
|
|
{"epoch": 0.2511013215859031, "step": 172, "batch_size": 64, "mean": 18.185348510742188, "std": 18.891036987304688, "min": -21.518531799316406, "p10": -4.333843994140624, "median": 16.94161891937256, "p90": 42.5125701904297, "max": 74.30046081542969, "pos_frac": 0.84375, "sample": [13.422815322875977, 24.624343872070312, -21.518531799316406, 20.718421936035156, 11.592735290527344, 2.5240402221679688, 36.51715087890625, 20.318450927734375, 15.80804443359375, 1.1821708679199219, -7.154579162597656, 12.769670486450195, -3.4474258422851562, 22.420196533203125, 26.052764892578125, 22.64690399169922, 15.593631744384766, 11.630172729492188, -6.85276985168457, 44.06202697753906, 1.7847442626953125, 30.43612289428711, 8.428390502929688, 31.05242919921875, 3.2822265625, 15.310550689697266, 33.64192199707031, 37.22713088989258, 4.766746520996094, 68.41705322265625, -2.2630538940429688, 19.522903442382812, 43.96466827392578, 13.098876953125, 35.33610534667969, 61.347747802734375, 74.30046081542969, 18.075193405151367, 22.75623321533203, 22.75902557373047, -4.713737487792969, 6.8633575439453125, 15.468399047851562, 24.603727340698242, 20.463478088378906, -3.0570831298828125, 10.101125717163086, 18.615026473999023, 5.4397125244140625, 39.50653076171875, 23.318756103515625, 2.7264862060546875, 3.10723876953125, 34.62820053100586, 23.177513122558594, -14.970367431640625, 29.87179946899414, 20.040847778320312, 50.153839111328125, 15.037017822265625, -4.795989990234375, -10.904380798339844, 43.800872802734375, 9.224258422851562], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000172.npy"}
|
|
{"epoch": 0.2525697503671072, "step": 173, "batch_size": 64, "mean": 18.32070541381836, "std": 17.518054962158203, "min": -13.054893493652344, "p10": -2.5041090011596676, "median": 16.15769386291504, "p90": 42.80900421142579, "max": 67.28913879394531, "pos_frac": 0.84375, "sample": [7.7194061279296875, 16.130722045898438, 18.761075973510742, 18.971473693847656, -4.26751708984375, 31.486968994140625, 5.971488952636719, 34.77268981933594, 42.22499084472656, 3.2294235229492188, 21.835205078125, -4.724822998046875, 3.399141311645508, -2.0040054321289062, 18.565963745117188, 43.142181396484375, 13.303573608398438, 23.318859100341797, 14.323568344116211, 36.436004638671875, 21.803802490234375, 6.021827697753906, 11.640602111816406, 8.933273315429688, 8.922462463378906, 16.793365478515625, 63.20866394042969, 7.872255325317383, 27.95098876953125, 21.328643798828125, 15.943061828613281, 10.791112899780273, -11.83111572265625, 30.691843032836914, 33.248443603515625, 17.311416625976562, 43.059295654296875, 46.512908935546875, -2.7184391021728516, 54.839393615722656, 35.04509353637695, 23.453842163085938, 5.724498748779297, 4.852180480957031, 8.256927490234375, 11.068721771240234, 67.28913879394531, 38.02988815307617, -9.620716094970703, 18.048919677734375, 10.133203506469727, 16.85129737854004, 14.611186981201172, -0.00079345703125, -13.054893493652344, 55.08819580078125, 12.200918197631836, -0.7836456298828125, 16.18466567993164, 17.880739212036133, 28.732940673828125, 10.544891357421875, 29.950672149658203, -2.882904052734375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000173.npy"}
|
|
{"epoch": 0.2540381791483113, "step": 174, "batch_size": 64, "mean": 18.424602508544922, "std": 18.221277236938477, "min": -17.52099609375, "p10": -3.158850860595703, "median": 16.053077697753906, "p90": 39.663675689697264, "max": 73.0679931640625, "pos_frac": 0.859375, "sample": [4.596397399902344, 13.849365234375, -4.508872985839844, 0.9328765869140625, 13.496368408203125, 11.664237976074219, 6.450572967529297, 7.421590805053711, 20.858963012695312, 37.37318420410156, 38.44920349121094, 2.9177398681640625, 23.890520095825195, 5.236379623413086, 26.437360763549805, 21.826210021972656, 15.123291015625, 18.217453002929688, 23.050804138183594, 49.67649841308594, 10.02349853515625, 14.679580688476562, 10.335243225097656, 1.996419906616211, 33.90362548828125, -16.3951416015625, 26.70465087890625, 40.43479919433594, 5.929100036621094, 11.837076187133789, 27.4421329498291, -3.561260223388672, 15.936111450195312, -5.037330627441406, 28.53509521484375, -0.8816986083984375, 18.91925048828125, 22.925479888916016, -11.898727416992188, 52.73512268066406, 55.2667236328125, 73.0679931640625, 5.6669921875, 16.1700439453125, -2.825094223022461, -17.52099609375, -3.301889419555664, 21.089340209960938, 17.598953247070312, 20.29584503173828, 39.610382080078125, 15.905645370483398, 14.161163330078125, 34.825904846191406, 26.489219665527344, 19.963577270507812, 39.68651580810547, 30.367027282714844, 4.2506256103515625, 2.994649887084961, 65.84170532226562, 29.426183700561523, 34.732601165771484, 13.88819694519043], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000174.npy"}
|
|
{"epoch": 0.2555066079295154, "step": 175, "batch_size": 64, "mean": 23.7386531829834, "std": 19.637550354003906, "min": -6.104118347167969, "p10": 3.8449869155883794, "median": 17.19618797302246, "p90": 55.349117279052734, "max": 72.57981872558594, "pos_frac": 0.984375, "sample": [8.291868209838867, 10.86480712890625, 18.50840187072754, 3.598358154296875, 34.239356994628906, 16.402587890625, 38.96089172363281, 15.470619201660156, 6.138936996459961, 62.19548034667969, 65.05230712890625, 3.2247276306152344, 5.411712646484375, 66.16265869140625, 16.79953384399414, -6.104118347167969, 33.78439712524414, 28.365768432617188, 17.59284210205078, 48.228477478027344, 27.626319885253906, 7.192466735839844, 26.59062957763672, 72.57981872558594, 2.4419174194335938, 8.472450256347656, 57.38981628417969, 22.09851837158203, 24.202163696289062, 10.121997833251953, 3.091888427734375, 6.206432342529297, 55.26043701171875, 15.723342895507812, 13.628128051757812, 10.16366958618164, 3.164409637451172, 46.78514099121094, 4.420454025268555, 8.680862426757812, 42.596004486083984, 25.417991638183594, 30.425735473632812, 49.887542724609375, 0.9734420776367188, 61.120033264160156, 34.56294250488281, 30.500587463378906, 11.640628814697266, 47.83824157714844, 45.87286376953125, 13.624296188354492, 21.696929931640625, 14.648475646972656, 9.154142379760742, 7.698123931884766, 24.400039672851562, 11.068458557128906, 27.754135131835938, 55.387123107910156, 19.311767578125, 5.250801086425781, 4.9183197021484375, 4.4947052001953125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000175.npy"}
|
|
{"epoch": 0.25697503671071953, "step": 176, "batch_size": 64, "mean": 21.942890167236328, "std": 17.43705940246582, "min": -10.739959716796875, "p10": 0.44505233764648505, "median": 20.91852569580078, "p90": 45.50640487670899, "max": 64.22175598144531, "pos_frac": 0.90625, "sample": [27.73638916015625, 11.313148498535156, 12.380325317382812, 36.35987854003906, 28.140914916992188, 22.850868225097656, 2.4967575073242188, 55.446380615234375, -2.8977813720703125, 13.018417358398438, 7.4761505126953125, 62.47210693359375, 22.717966079711914, 2.4007644653320312, 48.769989013671875, 26.077651977539062, 11.37054443359375, 58.95983123779297, 23.770828247070312, 2.2878494262695312, -2.2921829223632812, 16.03083038330078, 23.203453063964844, -3.3052406311035156, 46.030235290527344, -1.2710456848144531, 15.28253173828125, 29.05603790283203, 29.774932861328125, 5.043006896972656, 23.808547973632812, 15.541458129882812, 13.102066040039062, -10.739959716796875, 21.862831115722656, 28.88941192626953, 18.869369506835938, 10.247756958007812, 16.81890106201172, 24.403545379638672, 35.598838806152344, 1.6350593566894531, 4.9805145263671875, 1.1263389587402344, 22.55499267578125, 25.411842346191406, 33.134986877441406, 19.974220275878906, 36.39442443847656, 55.357879638671875, 28.071863174438477, 11.203609466552734, 18.52147674560547, -1.9733238220214844, 39.11494064331055, 38.70635986328125, 0.15307235717773438, 19.647491455078125, 44.209007263183594, 11.741222381591797, 44.28413391113281, 64.22175598144531, 19.86626625061035, 36.90245056152344], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000176.npy"}
|
|
{"epoch": 0.25844346549192365, "step": 177, "batch_size": 64, "mean": 21.963430404663086, "std": 20.603771209716797, "min": -6.3747100830078125, "p10": -1.341127014160156, "median": 17.294994354248047, "p90": 47.3712158203125, "max": 85.78280639648438, "pos_frac": 0.875, "sample": [19.265960693359375, 13.2525634765625, 26.134231567382812, 16.18670654296875, 15.919815063476562, 32.41204071044922, 5.9185028076171875, 15.479080200195312, 37.56316375732422, 36.683135986328125, 47.61354064941406, 0.2812995910644531, 23.906291961669922, 40.058677673339844, 13.886186599731445, 18.646053314208984, 11.818588256835938, 82.59356689453125, 0.8457107543945312, 44.64556884765625, 12.48574447631836, 29.378299713134766, -6.3747100830078125, 46.96575927734375, 0.11892509460449219, 13.908292770385742, 66.14717102050781, -3.5936203002929688, -1.0844268798828125, 28.499000549316406, 17.470808029174805, 36.88972473144531, 56.318397521972656, 2.0173416137695312, 18.673233032226562, 6.589202880859375, 13.368690490722656, 58.387996673583984, 0.19179344177246094, -1.9685897827148438, -3.861663818359375, 25.320663452148438, 5.190893173217773, 15.707168579101562, 11.922027587890625, 27.87067413330078, 46.90630340576172, -1.6979713439941406, 8.134510040283203, -1.451141357421875, 28.86846923828125, 21.991960525512695, 36.15505599975586, -5.854579925537109, 17.81005859375, 9.141883850097656, 17.11918067932129, 20.065874099731445, 47.54498291015625, 6.6000518798828125, 85.78280639648438, 45.30438232421875, 32.958396911621094, 10.62982177734375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000177.npy"}
|
|
{"epoch": 0.2599118942731278, "step": 178, "batch_size": 64, "mean": 24.486705780029297, "std": 22.665319442749023, "min": -12.7283935546875, "p10": -0.9489757537841789, "median": 18.361080169677734, "p90": 56.17904701232911, "max": 89.21905517578125, "pos_frac": 0.875, "sample": [32.9039421081543, 0.10095977783203125, 41.98322296142578, -8.508956909179688, 41.67784881591797, 15.476425170898438, 28.30542755126953, 8.297168731689453, 3.6275787353515625, 39.12052917480469, 14.437957763671875, 40.383750915527344, 44.29994201660156, 19.129173278808594, 17.12371826171875, 43.60455322265625, 76.81564331054688, 17.592987060546875, 89.21905517578125, -2.6146697998046875, 68.99333190917969, 17.489286422729492, 14.795440673828125, 42.80111312866211, 28.022069931030273, 55.35795211791992, 41.2230224609375, -7.267189025878906, 19.352981567382812, 10.455131530761719, -2.5479202270507812, 21.50543212890625, 16.33753204345703, 1.7980232238769531, 12.886154174804688, 57.71000671386719, 11.8974609375, -1.3058700561523438, 11.672447204589844, 30.325897216796875, 12.999256134033203, 17.55438232421875, -12.7283935546875, 72.19596862792969, -0.11622238159179688, 39.28850555419922, 23.97079086303711, 25.42394256591797, 2.085611343383789, 56.53094482421875, 14.746683120727539, 43.128700256347656, 22.007293701171875, 2.819551467895508, 27.9921875, 8.759834289550781, 30.387725830078125, 7.56829833984375, 10.61920166015625, 48.089752197265625, 63.4481201171875, -11.259284973144531, 44.207237243652344, 2.950580596923828], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000178.npy"}
|
|
{"epoch": 0.26138032305433184, "step": 179, "batch_size": 64, "mean": 24.460594177246094, "std": 23.770587921142578, "min": -28.762054443359375, "p10": -3.7336040496826164, "median": 24.872196197509766, "p90": 54.65450744628907, "max": 82.0667724609375, "pos_frac": 0.828125, "sample": [24.980796813964844, 45.2735710144043, 45.27772521972656, 62.172943115234375, 17.41313362121582, -11.186370849609375, 52.13356018066406, 28.79354476928711, 0.7701568603515625, -3.0914878845214844, 24.763595581054688, 19.304765701293945, 6.288047790527344, 15.840415954589844, 4.2908935546875, 43.35986328125, 0.7562408447265625, 49.750274658203125, 26.711711883544922, 1.342132568359375, 29.119165420532227, -1.8783416748046875, 54.862762451171875, 10.164447784423828, 56.01775360107422, 37.45043182373047, 30.71307373046875, 65.58625793457031, 49.88276672363281, 30.463050842285156, 37.672569274902344, 82.0667724609375, 69.36885070800781, 11.506065368652344, 31.162755966186523, 23.096649169921875, -1.458160400390625, 20.29128646850586, 1.6286468505859375, 32.42826843261719, 38.276512145996094, -7.44526481628418, 21.095335006713867, 8.048477172851562, 52.286651611328125, 34.231109619140625, -9.630699157714844, 14.264266967773438, 34.540958404541016, -0.45766448974609375, 8.7738037109375, 36.69770812988281, 20.2838134765625, 48.1397705078125, 54.1685791015625, 48.50909423828125, -28.762054443359375, 55.48393249511719, -7.866310119628906, 4.505302429199219, -22.192642211914062, 26.020263671875, 15.425390243530273, -4.008796691894531], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000179.npy"}
|
|
{"epoch": 0.26284875183553597, "step": 180, "batch_size": 64, "mean": 24.595914840698242, "std": 19.27443504333496, "min": -8.049385070800781, "p10": -0.45963649749755786, "median": 21.61882495880127, "p90": 52.32546653747559, "max": 74.037841796875, "pos_frac": 0.890625, "sample": [25.461633682250977, 42.923553466796875, 22.161575317382812, -5.99053955078125, -5.486572265625, 56.16070556640625, 6.658138275146484, 14.08074951171875, 43.960960388183594, 28.247039794921875, -0.7602252960205078, 33.96430206298828, 19.50726318359375, 33.21124267578125, 23.52167510986328, 9.273193359375, 40.51255798339844, 27.796916961669922, 34.204124450683594, 14.377822875976562, 31.32811737060547, 0.24173736572265625, 12.480945587158203, 50.623069763183594, 19.56110382080078, 60.13883972167969, 27.21289825439453, 26.523391723632812, 7.719562530517578, 15.44571304321289, 45.03753662109375, 5.692998886108398, 39.59788513183594, 23.37291717529297, 49.167327880859375, -1.622976303100586, 20.26618194580078, 42.48712921142578, 6.73309326171875, 53.28926086425781, 21.076074600219727, 2.7561111450195312, 57.546836853027344, 11.371147155761719, -5.1487884521484375, 12.754981994628906, 15.837875366210938, 28.060226440429688, 42.66755676269531, 15.072196960449219, 18.673297882080078, 53.0550651550293, 6.947175979614258, 74.037841796875, 18.060134887695312, -4.019065856933594, 37.876441955566406, 19.855758666992188, 11.083351135253906, -8.049385070800781, 68.06851196289062, 33.559112548828125, 12.742515563964844, 31.170761108398438], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000180.npy"}
|
|
{"epoch": 0.2643171806167401, "step": 181, "batch_size": 64, "mean": 17.567216873168945, "std": 18.16219711303711, "min": -14.637115478515625, "p10": -2.7568309783935545, "median": 14.257137298583984, "p90": 44.96337890625001, "max": 74.3797607421875, "pos_frac": 0.859375, "sample": [4.174339294433594, 7.347339630126953, 12.076364517211914, 30.389148712158203, 45.79280090332031, -2.7779178619384766, -2.4420242309570312, 16.960800170898438, 14.183830261230469, 13.7373046875, 11.937076568603516, 10.882892608642578, 1.373666763305664, 0.08065032958984375, -14.637115478515625, 35.954734802246094, 17.851863861083984, 13.651657104492188, 27.26988983154297, 8.90414047241211, 23.853073120117188, -10.75564956665039, 2.56475830078125, 43.02806091308594, 3.1007041931152344, 16.102256774902344, 16.42938232421875, 74.3797607421875, 28.789390563964844, 7.039134979248047, 33.323081970214844, 11.947822570800781, -7.737335205078125, 16.314346313476562, 16.876052856445312, 8.325754165649414, 6.689992904663086, 14.256416320800781, 47.65711975097656, -6.236114501953125, 10.028985977172852, -5.180633544921875, 51.28364562988281, 15.567119598388672, 39.00682067871094, 28.416486740112305, -7.9048004150390625, 29.03179359436035, 14.257858276367188, 21.884241104125977, 49.97068786621094, 62.500213623046875, 24.020469665527344, 21.350574493408203, 19.893234252929688, -2.7076282501220703, 0.9549045562744141, 13.145050048828125, 57.088653564453125, 29.98491668701172, 17.017494201660156, 16.329177856445312, 7.2037811279296875, 12.499359130859375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000181.npy"}
|
|
{"epoch": 0.2657856093979442, "step": 182, "batch_size": 64, "mean": 26.983417510986328, "std": 23.57621955871582, "min": -34.21168518066406, "p10": 0.35819778442382855, "median": 24.671287536621094, "p90": 58.815073013305664, "max": 71.30987548828125, "pos_frac": 0.90625, "sample": [57.0606689453125, 28.4024658203125, 59.679771423339844, 32.978607177734375, 24.471786499023438, 24.87078857421875, 43.919921875, 42.735321044921875, 65.32948303222656, 35.96091842651367, 17.7894287109375, 19.27100372314453, 7.875997543334961, 0.8530521392822266, 55.09123992919922, -34.21168518066406, 63.931053161621094, 1.5351486206054688, 4.2689971923828125, 15.080787658691406, 60.260467529296875, 1.9369773864746094, 70.35517120361328, 28.206878662109375, 15.726303100585938, -5.031711578369141, 56.772186279296875, 9.185298919677734, 10.237979888916016, 25.162261962890625, 48.068641662597656, 57.786285400390625, 17.802696228027344, -10.749404907226562, 32.557762145996094, 45.17223358154297, 27.356674194335938, 13.521255493164062, 41.150054931640625, 44.35630798339844, 0.17905426025390625, 15.502042770385742, 11.133344650268555, 44.188358306884766, 24.306991577148438, 33.562049865722656, 17.00958251953125, 58.89748764038086, -0.5267791748046875, 0.7761993408203125, 71.30987548828125, 45.84050750732422, 22.456069946289062, 4.7701873779296875, -7.7284088134765625, 6.747077941894531, -13.680168151855469, 4.745479583740234, 52.144195556640625, 23.5931396484375, 58.622772216796875, 14.353336334228516, 35.55742645263672, 46.44984436035156], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000182.npy"}
|
|
{"epoch": 0.26725403817914833, "step": 183, "batch_size": 64, "mean": 20.88549041748047, "std": 18.435461044311523, "min": -2.7816314697265625, "p10": 1.0703777313232423, "median": 15.518823623657227, "p90": 50.52491302490235, "max": 70.30805969238281, "pos_frac": 0.953125, "sample": [39.68345642089844, 31.58416748046875, 9.750997543334961, 14.518251419067383, 1.3986778259277344, 54.8327751159668, 32.69456481933594, 38.572052001953125, 30.502418518066406, 6.861167907714844, 8.901138305664062, 24.244285583496094, 2.1755714416503906, -0.2162914276123047, 1.0068206787109375, 28.01152801513672, 70.30805969238281, 57.064598083496094, 32.572425842285156, 14.990888595581055, 22.26276397705078, 15.60653305053711, 54.889923095703125, 33.36687088012695, 15.431114196777344, 1.2186775207519531, 26.15139389038086, 7.133186340332031, 7.0652923583984375, 0.4468841552734375, 31.09032440185547, 20.514249801635742, 1.7441120147705078, 0.9214382171630859, 32.597190856933594, 1.9866943359375, 20.732120513916016, 36.40174865722656, 50.26177215576172, 58.19231414794922, 5.2617340087890625, 11.469402313232422, 61.77581787109375, 0.91290283203125, 5.986717224121094, -2.7816314697265625, 32.25328826904297, -1.7386474609375, 7.814666748046875, 2.627899169921875, 50.63768768310547, 19.77227783203125, 35.201812744140625, 31.98503875732422, 10.162113189697266, 13.427215576171875, 4.091102600097656, 39.27661895751953, 10.521026611328125, 21.65252685546875, 1.9625320434570312, 17.49361801147461, 6.573760986328125, 12.859687805175781], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000183.npy"}
|
|
{"epoch": 0.2687224669603524, "step": 184, "batch_size": 64, "mean": 15.659937858581543, "std": 17.907655715942383, "min": -10.619544982910156, "p10": -1.9542850494384763, "median": 10.777595520019531, "p90": 35.1061264038086, "max": 84.0571517944336, "pos_frac": 0.84375, "sample": [6.938751220703125, -1.6135482788085938, 14.960426330566406, 25.663650512695312, 21.940994262695312, 17.928543090820312, -0.8386917114257812, 6.24786376953125, 25.20079803466797, -2.1003150939941406, 84.0571517944336, 47.350982666015625, 0.8329944610595703, 20.7479248046875, 13.010673522949219, 10.44158935546875, 14.437652587890625, 21.082176208496094, 22.189189910888672, 50.1439208984375, 1.4724006652832031, 31.2625732421875, -1.521728515625, 2.7786788940429688, 5.414054870605469, 26.333755493164062, 9.771066665649414, 4.210868835449219, 25.708282470703125, 7.008951187133789, 25.740264892578125, 2.9896240234375, 38.3704833984375, 35.74971008300781, -7.892917633056641, 45.79204559326172, 5.635993957519531, -10.619544982910156, 7.887119293212891, 20.09109878540039, 33.60443115234375, 0.3410186767578125, 6.014720916748047, 11.13212776184082, 22.799209594726562, 31.687911987304688, 4.023942947387695, 8.292560577392578, 7.3383331298828125, -4.859230041503906, -4.417804718017578, 24.414751052856445, -2.8533554077148438, 15.522968292236328, 4.196891784667969, 8.90704345703125, 10.538871765136719, 30.073272705078125, 8.825199127197266, 11.016319274902344, 12.545501708984375, -5.5715789794921875, 18.16794204711914, 75.689453125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000184.npy"}
|
|
{"epoch": 0.2701908957415565, "step": 185, "batch_size": 64, "mean": 15.436178207397461, "std": 19.218881607055664, "min": -24.6705322265625, "p10": -2.5286979675292964, "median": 11.274328231811523, "p90": 44.805409622192386, "max": 81.427001953125, "pos_frac": 0.75, "sample": [-24.6705322265625, -2.751720428466797, 0.5404758453369141, 31.761871337890625, 20.956737518310547, 7.439027786254883, -3.1864013671875, 14.20492172241211, 32.74761962890625, 16.450122833251953, 7.415424346923828, 31.99897003173828, 21.63542938232422, 5.17304801940918, 26.300216674804688, 24.240203857421875, -1.6454544067382812, 15.563629150390625, 7.8212738037109375, 3.98663330078125, -0.7111663818359375, 22.13062858581543, 0.6834564208984375, 24.966327667236328, 7.714344024658203, 20.974037170410156, -1.0926055908203125, 24.264205932617188, 5.401203155517578, 18.414566040039062, 33.601806640625, -2.9028282165527344, 44.29693603515625, 45.64978790283203, -7.620025634765625, 29.108306884765625, -1.6529693603515625, 35.30109405517578, 49.498802185058594, 4.201068878173828, -1.3081932067871094, 14.221899032592773, -0.8075141906738281, -5.5158843994140625, -0.7381229400634766, 4.990930557250977, 25.323646545410156, 53.72130584716797, 45.0233268737793, 11.339836120605469, 81.427001953125, 5.135768890380859, 22.30316925048828, 8.938091278076172, -0.32696533203125, 51.4374885559082, 11.208820343017578, 25.94947052001953, -22.399139404296875, 47.140380859375, 1.7903060913085938, 6.586875915527344, 16.272720336914062, -2.008312225341797], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000185.npy"}
|
|
{"epoch": 0.27165932452276065, "step": 186, "batch_size": 64, "mean": 25.340028762817383, "std": 18.850744247436523, "min": -9.084362030029297, "p10": 4.139540100097657, "median": 22.58097743988037, "p90": 54.50668716430665, "max": 76.47590637207031, "pos_frac": 0.953125, "sample": [4.29241943359375, 22.38452911376953, 56.68763732910156, 34.19988250732422, 27.16425323486328, 11.549423217773438, 39.59001159667969, -0.5074577331542969, 62.44767761230469, 42.29914093017578, 17.676025390625, 43.297027587890625, 4.0740203857421875, 11.7213134765625, 25.703121185302734, 5.687145233154297, 22.77742576599121, 5.317268371582031, 13.637947082519531, 8.01446533203125, 33.2376708984375, 9.952442169189453, 17.56133270263672, 36.295570373535156, 15.016250610351562, 26.865917205810547, 33.73855972290039, 55.297691345214844, 52.6610107421875, 14.845840454101562, 50.15907287597656, 13.505277633666992, 30.645164489746094, 11.544458389282227, 12.190238952636719, 30.407611846923828, 63.10394287109375, 65.03268432617188, 37.01791763305664, 2.2515830993652344, 29.9627685546875, 0.6248798370361328, 35.89289093017578, 16.458999633789062, 18.690956115722656, 76.47590637207031, 0.8238677978515625, 36.369110107421875, -2.6929168701171875, 7.454048156738281, 46.00091552734375, 57.24329376220703, 23.782875061035156, 36.26726150512695, 17.979843139648438, 38.12207794189453, 11.606376647949219, -9.084362030029297, 10.041679382324219, 27.00971221923828, 21.045578002929688, 24.586044311523438, 12.032737731933594, 15.7237548828125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000186.npy"}
|
|
{"epoch": 0.27312775330396477, "step": 187, "batch_size": 64, "mean": 18.950557708740234, "std": 20.59282684326172, "min": -27.73986053466797, "p10": -1.6332426071166992, "median": 17.493057250976562, "p90": 49.44420089721681, "max": 68.41879272460938, "pos_frac": 0.828125, "sample": [-27.73986053466797, 67.32589721679688, -0.1396160125732422, 14.783985137939453, -15.038990020751953, 20.065383911132812, 34.39283752441406, 30.04083251953125, 15.59249496459961, 23.55426788330078, 16.430255889892578, 11.924028396606445, 46.54710388183594, 12.828498840332031, 17.313034057617188, 26.375640869140625, 44.859745025634766, 4.3229827880859375, 0.42734527587890625, -1.4712104797363281, 21.511611938476562, -1.6347503662109375, 68.41879272460938, 26.003089904785156, 20.804397583007812, 22.41855239868164, 65.95095825195312, -3.1816673278808594, 35.79697799682617, 3.104337692260742, -2.8512115478515625, 31.538795471191406, 2.6294403076171875, -1.6297245025634766, 1.0314979553222656, 50.685813903808594, 11.474906921386719, 21.821569442749023, -1.5290069580078125, 29.51360321044922, 41.69834899902344, 58.47705078125, 17.673080444335938, 4.842399597167969, 30.49615478515625, 8.116914749145508, 13.288829803466797, 5.141632080078125, -21.45349884033203, 24.287986755371094, 3.2402267456054688, 7.6038818359375, 21.989501953125, 17.767478942871094, 30.372133255004883, 13.177627563476562, 31.364837646484375, 2.31634521484375, 17.847122192382812, 60.38473892211914, 51.34759521484375, 19.817598342895508, 12.058137893676758, -3.2930450439453125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000187.npy"}
|
|
{"epoch": 0.2745961820851689, "step": 188, "batch_size": 64, "mean": 21.670263290405273, "std": 20.467336654663086, "min": -29.131370544433594, "p10": -1.0429559707641598, "median": 16.743595123291016, "p90": 52.89972839355471, "max": 64.52778625488281, "pos_frac": 0.859375, "sample": [36.25178146362305, 17.04737091064453, 55.416656494140625, 1.6664924621582031, 40.46782684326172, 7.9229736328125, 3.0340042114257812, -7.328971862792969, 35.332000732421875, 42.030357360839844, 12.999122619628906, -1.3772964477539062, 27.215728759765625, 25.378738403320312, 47.54998779296875, 34.81001281738281, 16.393592834472656, 13.385566711425781, 14.660400390625, -0.5819988250732422, 5.0603485107421875, 2.7235183715820312, 23.945144653320312, 16.39318084716797, 5.551780700683594, 19.91503143310547, 11.690635681152344, 9.102275848388672, 64.09249114990234, 6.499786376953125, 62.348365783691406, -29.131370544433594, 42.921539306640625, 32.136146545410156, 64.52778625488281, 18.032424926757812, 12.12554931640625, 55.192474365234375, 3.5313758850097656, 46.12004852294922, 31.129806518554688, 13.024608612060547, -8.266983032226562, 16.077646255493164, 41.735923767089844, 12.675262451171875, 44.63443374633789, -3.7311058044433594, 25.233295440673828, 58.19097137451172, -1.240509033203125, 31.183231353759766, 19.355228424072266, -4.044225692749023, 33.11729049682617, 16.4398193359375, 23.22701072692871, 61.55162048339844, 39.16425704956055, 10.105997085571289, 20.200042724609375, -0.07828330993652344, 10.343132019042969, 1.8155059814453125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000188.npy"}
|
|
{"epoch": 0.27606461086637296, "step": 189, "batch_size": 64, "mean": 22.340126037597656, "std": 19.784210205078125, "min": -26.667800903320312, "p10": 2.3460739135742195, "median": 18.112021446228027, "p90": 52.62342262268067, "max": 70.375, "pos_frac": 0.921875, "sample": [10.801513671875, 44.125492095947266, -0.165618896484375, 61.883819580078125, 9.634040832519531, 2.024667739868164, 14.143159866333008, -2.9840240478515625, 3.0960216522216797, 31.892555236816406, 32.63309860229492, 60.847389221191406, 38.617340087890625, 10.462310791015625, 48.31847381591797, 20.029876708984375, 18.23017120361328, 7.566045761108398, 17.145065307617188, 10.791519165039062, -1.6565570831298828, 34.871604919433594, 19.807098388671875, 42.93312072753906, 25.334144592285156, 9.904052734375, 3.598785400390625, 18.440166473388672, 8.720233917236328, 45.66157531738281, 13.893913269042969, 26.358007431030273, 16.538536071777344, 54.23835754394531, 4.47552490234375, 17.993871688842773, 1.3108654022216797, 69.10540771484375, 24.85298728942871, 17.07331085205078, 3.915496826171875, 27.30536651611328, 20.620445251464844, 13.535224914550781, 7.189943313598633, 58.579078674316406, 44.758056640625, 70.375, 14.852470397949219, 53.16451644897461, 51.360870361328125, 4.357423782348633, 14.795921325683594, 35.904388427734375, 7.766851425170898, 10.967994689941406, 30.930023193359375, 20.425716400146484, 22.152393341064453, -26.667800903320312, 4.105255126953125, 27.2530517578125, -2.3018646240234375, 21.874267578125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000189.npy"}
|
|
{"epoch": 0.2775330396475771, "step": 190, "batch_size": 64, "mean": 23.570049285888672, "std": 21.541614532470703, "min": -9.24749755859375, "p10": 0.8835657119750978, "median": 19.14565658569336, "p90": 53.372885131835936, "max": 102.48904418945312, "pos_frac": 0.921875, "sample": [0.8365631103515625, 35.126625061035156, 14.517959594726562, 6.2117156982421875, 22.260818481445312, 8.925315856933594, 48.167449951171875, 19.38349151611328, 2.6263809204101562, 53.66143798828125, 28.938720703125, 6.7012939453125, 15.255653381347656, 11.961273193359375, 68.85089111328125, 21.57086181640625, 16.622726440429688, 14.05804443359375, 41.447532653808594, 19.804824829101562, -0.6981372833251953, 48.739715576171875, -3.813323974609375, 31.64897918701172, 30.0045166015625, 20.402015686035156, 9.062164306640625, 53.21424865722656, 60.54124450683594, 33.326904296875, 15.277786254882812, 39.86357116699219, 1.259979248046875, 3.032194137573242, 19.841590881347656, 31.191368103027344, 23.50170135498047, 7.6292877197265625, -8.42279052734375, 14.256515502929688, 53.44087219238281, 0.5574951171875, 22.504737854003906, 18.61090087890625, 1.4901885986328125, 3.2472076416015625, 31.066638946533203, 51.206451416015625, -2.6351318359375, 27.954933166503906, -9.24749755859375, 66.70027160644531, 12.408981323242188, 13.250104904174805, 24.380844116210938, 28.319305419921875, 11.521141052246094, 102.48904418945312, 0.9932384490966797, 52.3216552734375, 18.907821655273438, 58.45356750488281, 18.291805267333984, 15.459503173828125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000190.npy"}
|
|
{"epoch": 0.2790014684287812, "step": 191, "batch_size": 64, "mean": 27.898746490478516, "std": 24.585590362548828, "min": -12.43438720703125, "p10": 1.048148345947266, "median": 23.95552635192871, "p90": 58.53111572265625, "max": 112.023193359375, "pos_frac": 0.921875, "sample": [57.9144287109375, 31.216140747070312, 64.53398132324219, 6.928443908691406, 49.0718994140625, 29.679550170898438, 33.52764892578125, 42.14872741699219, -1.1118812561035156, 47.111907958984375, 34.025909423828125, 35.71228790283203, 24.06378173828125, 37.81303405761719, 48.54473876953125, 12.051582336425781, 4.321434020996094, 5.330810546875, 35.05253601074219, 42.857276916503906, 20.793380737304688, 17.56182861328125, 18.31317138671875, 70.1051025390625, 27.067480087280273, 17.49591064453125, -1.3336410522460938, 90.13412475585938, 10.668792724609375, 13.896621704101562, 31.25762939453125, -12.269710540771484, 9.943649291992188, 4.469455718994141, 14.886627197265625, 25.17498779296875, 30.075592041015625, 23.725379943847656, 86.42060852050781, 0.8797531127929688, -0.9659976959228516, 31.79644775390625, 35.767127990722656, -12.43438720703125, 7.7528533935546875, 112.023193359375, 17.481252670288086, 7.697601318359375, 14.093185424804688, 58.79541015625, 18.539474487304688, 42.20384216308594, 23.847270965576172, 0.8084030151367188, 38.92437744140625, 22.52556610107422, 13.361030578613281, 25.360004425048828, 1.441070556640625, 13.913002014160156, 87.19013977050781, 19.08441162109375, 36.21559143066406, 30.03797149658203], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000191.npy"}
|
|
{"epoch": 0.28046989720998533, "step": 192, "batch_size": 64, "mean": 23.677879333496094, "std": 22.121126174926758, "min": -8.428630828857422, "p10": -1.9902786254882812, "median": 22.301986694335938, "p90": 52.34594879150391, "max": 94.81887817382812, "pos_frac": 0.828125, "sample": [14.004096984863281, -1.1980667114257812, 29.795629501342773, 31.890403747558594, 51.310401916503906, 26.905357360839844, 0.02851104736328125, -5.627481460571289, 10.624282836914062, -2.68585205078125, 5.9726409912109375, 22.45348358154297, -3.7156600952148438, 27.740676879882812, 18.886886596679688, -1.8948211669921875, -6.794647216796875, 16.107332229614258, -1.588287353515625, 19.63885498046875, -1.7715377807617188, 65.57069396972656, -8.428630828857422, 27.242576599121094, 6.713653564453125, 66.67692565917969, 50.11827087402344, 94.81887817382812, 5.422374725341797, 67.87518310546875, 18.160995483398438, 23.584976196289062, 51.37969970703125, 39.56972122192383, 31.119644165039062, 8.266481399536133, 44.11170959472656, 52.76005554199219, 30.25389862060547, 3.457050323486328, 34.90791320800781, 71.38328552246094, 12.681306838989258, 1.4534835815429688, 31.145862579345703, 37.818199157714844, 22.150489807128906, 17.813682556152344, 16.60205078125, 22.49596405029297, 25.147048950195312, -2.03118896484375, 28.488998413085938, 8.326202392578125, 8.441192626953125, 45.885337829589844, 23.751869201660156, 18.28168487548828, -6.629241943359375, 44.09382629394531, 30.221031188964844, 25.160104751586914, 15.771835327148438, 53.267059326171875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000192.npy"}
|
|
{"epoch": 0.28193832599118945, "step": 193, "batch_size": 64, "mean": 16.950923919677734, "std": 18.348241806030273, "min": -45.65321350097656, "p10": -1.4036169052124023, "median": 17.462520599365234, "p90": 39.14150085449219, "max": 59.43144226074219, "pos_frac": 0.875, "sample": [15.327682495117188, -1.4447498321533203, 19.0802001953125, 26.32299041748047, -45.65321350097656, 21.69405174255371, 36.463104248046875, 25.170536041259766, -32.21083068847656, 59.43144226074219, 36.14467239379883, -5.931684494018555, 20.110816955566406, 15.98958969116211, 35.777503967285156, 2.1540184020996094, 7.749029159545898, 20.71539306640625, 0.33016204833984375, 21.129486083984375, 1.6487293243408203, 10.10833740234375, 17.357364654541016, 16.563316345214844, 7.062726974487305, 21.17818832397461, 4.849742889404297, 16.419845581054688, 4.883140563964844, 12.575820922851562, -7.118595123291016, 57.46485900878906, 22.632568359375, 8.585289001464844, 27.280349731445312, -5.1087799072265625, 19.340232849121094, 2.5430526733398438, 3.4599552154541016, 25.23576545715332, 4.043418884277344, 26.18932342529297, 16.06975555419922, 18.208412170410156, 20.655609130859375, -13.624635696411133, 48.707305908203125, 45.624855041503906, 18.99997901916504, 14.919916152954102, -1.3076400756835938, 37.149391174316406, 17.567676544189453, 6.772880554199219, 34.75044250488281, 31.17071533203125, 41.571746826171875, 39.995262145996094, 10.206043243408203, 15.090827941894531, 44.433616638183594, 11.547950744628906, 28.856414794921875, 21.947731018066406], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000193.npy"}
|
|
{"epoch": 0.2834067547723935, "step": 194, "batch_size": 64, "mean": 22.667644500732422, "std": 19.858245849609375, "min": -5.2749786376953125, "p10": 0.7322265625000002, "median": 19.48180103302002, "p90": 51.221525573730474, "max": 81.15718078613281, "pos_frac": 0.90625, "sample": [33.71698760986328, -3.3110885620117188, 14.032630920410156, 25.891510009765625, 5.4414825439453125, 52.027313232421875, -2.1826515197753906, 49.34135437011719, 6.7600555419921875, -1.4862442016601562, 13.06496810913086, 56.5831298828125, -5.2749786376953125, 24.091049194335938, 21.73389434814453, 6.963539123535156, 20.157249450683594, 6.46636962890625, 20.82583999633789, 35.50543212890625, 1.1125259399414062, 15.972806930541992, 4.181785583496094, -0.1754150390625, 42.020530700683594, 8.244071960449219, 3.061429977416992, 81.15718078613281, 33.090850830078125, 40.19343566894531, 62.912567138671875, 45.030853271484375, 44.69084548950195, 21.14678192138672, 9.641578674316406, 60.43396759033203, 23.975439071655273, 5.321466445922852, 28.933349609375, 3.6508750915527344, 54.8017578125, 6.866851806640625, 0.6400432586669922, 8.515602111816406, 29.364765167236328, 41.87590789794922, 8.59042739868164, 2.120697021484375, 18.806352615356445, 9.379440307617188, 31.154922485351562, 8.449859619140625, 15.99386215209961, 53.532501220703125, 0.9473209381103516, 46.65760040283203, 25.84619903564453, 43.55067443847656, 36.83403015136719, 11.964433670043945, 42.031883239746094, 14.548774719238281, 23.533653259277344, -0.223175048828125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000194.npy"}
|
|
{"epoch": 0.28487518355359764, "step": 195, "batch_size": 64, "mean": 22.482343673706055, "std": 18.670944213867188, "min": -13.693412780761719, "p10": 0.9351194381713877, "median": 21.258570671081543, "p90": 44.19702987670899, "max": 95.12062072753906, "pos_frac": 0.921875, "sample": [34.55511474609375, 20.365554809570312, 41.273773193359375, 35.67079162597656, 15.574165344238281, 47.51354217529297, 9.352485656738281, 29.511276245117188, 32.08270263671875, 0.15563392639160156, 0.5516891479492188, 14.884536743164062, 14.993110656738281, -12.385101318359375, 9.44024658203125, 23.789310455322266, 28.112422943115234, 23.559173583984375, -13.693412780761719, 95.12062072753906, 17.595279693603516, 21.94972801208496, 7.372936248779297, 4.092742919921875, 53.21023941040039, 21.079940795898438, 33.88702392578125, 44.82575988769531, 23.024127960205078, 52.78059387207031, 29.691261291503906, -2.485626220703125, 39.615936279296875, 29.887840270996094, 20.1328125, 30.883560180664062, 1.8297901153564453, 15.727363586425781, 24.300003051757812, 29.736099243164062, -7.471626281738281, 39.857177734375, 9.514423370361328, 13.660636901855469, 73.01622009277344, 43.13648223876953, 13.618160247802734, 15.16131591796875, 13.908233642578125, 25.301115036010742, 18.156890869140625, 14.918357849121094, 44.65155029296875, 26.13623046875, 5.128501892089844, -0.893646240234375, 22.228864669799805, 17.36089324951172, 25.6242733001709, 21.43720054626465, 2.436077117919922, 17.708709716796875, 28.586181640625, 6.122734069824219], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000195.npy"}
|
|
{"epoch": 0.28634361233480177, "step": 196, "batch_size": 64, "mean": 26.918298721313477, "std": 22.219341278076172, "min": -14.008438110351562, "p10": 3.423771667480469, "median": 21.631463050842285, "p90": 58.92893676757813, "max": 82.51739501953125, "pos_frac": 0.953125, "sample": [58.72135925292969, 63.74715805053711, 40.297760009765625, 44.051361083984375, 5.1635894775390625, 46.28678894042969, 46.47908020019531, 43.96343994140625, 50.36277770996094, 26.70856475830078, 8.324943542480469, 25.87050437927246, 36.29210662841797, 55.231834411621094, 19.922927856445312, 8.119718551635742, 10.463323593139648, 12.695236206054688, 4.456108093261719, 46.37708282470703, 73.79643249511719, 1.202688217163086, 12.531356811523438, 12.749465942382812, 15.147624969482422, 16.634201049804688, 33.73887634277344, 45.725486755371094, 15.460853576660156, 5.031742095947266, 59.01789855957031, 10.093429565429688, 72.24832153320312, 56.32044982910156, -6.853271484375, 23.305831909179688, 32.75664138793945, 19.137977600097656, 3.3603668212890625, 17.340599060058594, 20.60748863220215, 28.035160064697266, 82.51739501953125, 0.6563587188720703, 25.210487365722656, 10.94461441040039, 10.532997131347656, 17.095855712890625, 7.128292083740234, 3.57171630859375, -14.008438110351562, 15.673606872558594, 31.83111572265625, 19.883220672607422, 68.37739562988281, -4.817718505859375, 2.051727294921875, 22.655437469482422, 24.61890411376953, 41.44171142578125, 66.4920883178711, 37.47478485107422, 28.518829345703125, 3.995424270629883], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000196.npy"}
|
|
{"epoch": 0.2878120411160059, "step": 197, "batch_size": 64, "mean": 21.327590942382812, "std": 18.700523376464844, "min": -12.61541748046875, "p10": 1.5567043304443362, "median": 17.1305513381958, "p90": 45.08239860534669, "max": 72.75408172607422, "pos_frac": 0.921875, "sample": [-0.06728744506835938, 65.13185119628906, 32.67771911621094, 7.057502746582031, 33.68458557128906, -0.5529251098632812, 12.223106384277344, 5.32318115234375, 16.8144474029541, 64.00076293945312, 72.75408172607422, 34.277618408203125, 17.75033187866211, 17.837371826171875, 5.7279815673828125, 14.693340301513672, 6.172054290771484, 56.19330596923828, 33.53343200683594, 11.7686767578125, 31.577392578125, 37.06073760986328, 32.46651840209961, 9.47802734375, 14.80488395690918, 5.543205261230469, 3.7342376708984375, 24.169944763183594, 58.261192321777344, 37.3746337890625, 63.94685363769531, 13.292755126953125, 1.7228202819824219, 31.2642822265625, -1.501312255859375, 30.891525268554688, 7.8624420166015625, 5.9229278564453125, -2.2557830810546875, -12.61541748046875, 6.519983291625977, 24.64037322998047, 0.7426624298095703, 14.005203247070312, 14.221824645996094, 14.341079711914062, 40.00025939941406, 17.9898681640625, 24.318939208984375, 46.38277816772461, 1.4855117797851562, 33.53593826293945, 16.694488525390625, 7.789863586425781, 2.1962051391601562, 23.09026336669922, 17.4466552734375, 23.3041934967041, 42.048179626464844, 1.9155654907226562, 28.599609375, 23.394386291503906, 8.035400390625, 32.26344299316406], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000197.npy"}
|
|
{"epoch": 0.28928046989721, "step": 198, "batch_size": 64, "mean": 23.0856990814209, "std": 23.145610809326172, "min": -13.51214599609375, "p10": 1.9882308959960944, "median": 17.17600154876709, "p90": 46.567557907104494, "max": 120.1640625, "pos_frac": 0.90625, "sample": [-3.1290969848632812, -2.6984100341796875, 18.918527603149414, 74.05032348632812, 17.98473358154297, 32.01477813720703, 44.282440185546875, 12.27499008178711, 36.05767822265625, 29.979001998901367, 45.66180419921875, 25.993560791015625, -13.51214599609375, 19.309005737304688, 11.268913269042969, 17.010478973388672, 40.49408721923828, 40.05970764160156, 12.812301635742188, 21.925352096557617, 5.986789703369141, 4.605201721191406, 10.722909927368164, 14.106460571289062, 4.91187858581543, 13.12451171875, 11.30172348022461, 26.725635528564453, 13.388067245483398, 46.95573806762695, 23.01214599609375, 16.73487091064453, 14.133743286132812, 1.7079925537109375, 37.58038330078125, 4.330310821533203, 2.642120361328125, 15.392311096191406, 8.640289306640625, 57.33587646484375, -1.0650634765625, 86.68513488769531, 27.32823944091797, 13.505706787109375, 5.764045715332031, 21.044769287109375, 56.893043518066406, 9.564924240112305, 3.8649978637695312, 17.494657516479492, -2.3496971130371094, 21.7896728515625, 44.77707290649414, 45.08735656738281, 17.341524124145508, 36.824405670166016, 21.850860595703125, 14.660537719726562, -4.448141098022461, 70.96908569335938, 8.887413024902344, 120.1640625, 8.751859664916992, 18.00124740600586], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000198.npy"}
|
|
{"epoch": 0.2907488986784141, "step": 199, "batch_size": 64, "mean": 22.919063568115234, "std": 22.386255264282227, "min": -17.710159301757812, "p10": -0.4883747100830065, "median": 19.708515167236328, "p90": 55.64830169677735, "max": 95.28842163085938, "pos_frac": 0.890625, "sample": [19.50598907470703, 34.80284118652344, -12.758697509765625, 4.929779052734375, 53.68806457519531, 35.97700500488281, -5.936767578125, 39.47932434082031, -17.710159301757812, 44.89824676513672, 29.012985229492188, 10.611053466796875, 10.11962890625, 25.19634246826172, 65.63390350341797, 43.74987030029297, 5.95135498046875, 26.438987731933594, 62.32258605957031, 13.770833969116211, 3.07574462890625, 32.1140251159668, 44.06341552734375, 45.417205810546875, -9.152957916259766, 4.57403564453125, 13.689102172851562, 17.100692749023438, 40.7296142578125, -2.976104736328125, 2.9779205322265625, 56.4884033203125, 67.44345092773438, 12.522178649902344, 23.19849395751953, 57.41785430908203, 2.4239730834960938, 20.676204681396484, 3.2419357299804688, 45.86967849731445, 3.6879043579101562, 19.911041259765625, 27.18816375732422, 12.6783447265625, 58.821311950683594, 12.992141723632812, 21.121410369873047, 6.209939956665039, 2.6471900939941406, -6.587486267089844, 21.81256866455078, 12.440401077270508, 9.897232055664062, 24.15546417236328, 46.69117736816406, 28.693389892578125, 4.6307830810546875, 18.426902770996094, -1.0395622253417969, 12.435111999511719, 39.180511474609375, 24.161895751953125, 95.28842163085938, 0.7977294921875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000199.npy"}
|
|
{"epoch": 0.2922173274596182, "step": 200, "batch_size": 64, "mean": 24.642288208007812, "std": 25.758798599243164, "min": -45.71478271484375, "p10": -0.7799459457397457, "median": 18.634178161621094, "p90": 57.59874420166017, "max": 89.00363159179688, "pos_frac": 0.875, "sample": [55.38599395751953, 36.74909210205078, 21.207584381103516, 66.4996337890625, 8.997982025146484, 19.998863220214844, 5.001655578613281, 19.191530227661133, 81.5499267578125, 2.3145408630371094, 66.8203353881836, 52.91969299316406, 17.472503662109375, -5.951164245605469, 6.335824966430664, 58.54706573486328, 38.28456115722656, 65.67942810058594, 7.259033203125, 46.861114501953125, 5.228006362915039, 18.076826095581055, 49.93682098388672, 50.0208740234375, 3.2378463745117188, 16.630765914916992, 23.112030029296875, 4.8200225830078125, 88.68522644042969, 6.687232971191406, 23.191043853759766, -1.0814590454101562, 10.777603149414062, 1.27642822265625, 17.602455139160156, 19.234697341918945, -45.71478271484375, 28.530899047851562, 37.409889221191406, 89.00363159179688, 4.563632965087891, 33.47871398925781, 14.81207275390625, 17.829254150390625, -0.9134407043457031, -10.368057250976562, 21.777692794799805, 7.697216033935547, 44.176300048828125, -0.4684581756591797, 3.402618408203125, 51.73701477050781, 12.028732299804688, 15.301963806152344, 10.387191772460938, 51.09501647949219, 39.14970016479492, 29.17249298095703, 5.398536682128906, 23.880409240722656, -4.934467315673828, 53.851600646972656, -5.506805419921875, 41.766212463378906], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000200.npy"}
|
|
{"epoch": 0.2936857562408223, "step": 201, "batch_size": 64, "mean": 25.56842803955078, "std": 23.333568572998047, "min": -19.69439697265625, "p10": -0.8998674392700188, "median": 22.801776885986328, "p90": 55.750619506835946, "max": 98.30783081054688, "pos_frac": 0.875, "sample": [58.60807800292969, 15.773675918579102, 48.94829559326172, -19.69439697265625, 11.195098876953125, 30.143173217773438, 10.33213996887207, 6.158271789550781, 3.999542236328125, 11.271026611328125, -1.73876953125, 25.263412475585938, 7.9031982421875, 24.429367065429688, 56.781494140625, 37.87615203857422, 33.76947021484375, 19.9295711517334, 8.919692993164062, 49.69775390625, 73.30030822753906, 66.9996337890625, 14.330192565917969, 0.6930694580078125, 24.185211181640625, 58.464134216308594, -10.439746856689453, 30.129257202148438, 28.908790588378906, 10.621011734008789, 36.58024978637695, 27.980331420898438, 40.7281494140625, 47.0592041015625, -9.68218994140625, -1.2422981262207031, 43.975677490234375, 9.285324096679688, 13.738044738769531, 83.78559875488281, 32.9034423828125, 9.985427856445312, 15.036911010742188, 17.78729248046875, 5.065614700317383, 41.00286865234375, 6.209175109863281, 15.234870910644531, 53.345245361328125, 19.87757110595703, 27.31500244140625, -4.7718048095703125, 18.864200592041016, 41.64557647705078, 0.3811187744140625, 42.49267578125, 98.30783081054688, 40.68260955810547, -0.10086250305175781, -3.053070068359375, 45.18634033203125, 23.46300506591797, 22.140548706054688, 38.41155242919922], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000201.npy"}
|
|
{"epoch": 0.29515418502202645, "step": 202, "batch_size": 64, "mean": 18.85692596435547, "std": 21.07270622253418, "min": -26.96721649169922, "p10": -3.544053268432616, "median": 16.259931564331055, "p90": 48.39544296264649, "max": 74.73672485351562, "pos_frac": 0.84375, "sample": [64.39994812011719, 7.04185676574707, 19.912689208984375, 42.52711486816406, 0.8142814636230469, 19.11175537109375, 39.10649871826172, 3.909463882446289, 35.32541275024414, 22.531347274780273, 7.765678405761719, -15.140884399414062, 20.41143035888672, 46.075416564941406, 26.71605682373047, 22.326465606689453, -22.90404510498047, 3.806060791015625, 19.17898941040039, 49.389739990234375, 15.557777404785156, 49.68357849121094, -5.508525848388672, 28.82086181640625, 5.720558166503906, 6.312522888183594, 74.73672485351562, 3.8804378509521484, 13.139829635620117, 51.38105010986328, 14.404251098632812, 35.900020599365234, 44.897483825683594, 31.407379150390625, 0.1091461181640625, 1.9028587341308594, 20.712112426757812, 27.314971923828125, 41.28877258300781, 51.0054931640625, 37.45759201049805, 0.5976028442382812, 0.6893138885498047, 42.561805725097656, 8.901405334472656, 44.39517593383789, 26.16988182067871, -6.989967346191406, 7.074909210205078, 9.2225341796875, 11.535274505615234, 6.850551605224609, -26.96721649169922, -1.7305450439453125, -0.1087188720703125, 16.962085723876953, 14.727251052856445, -4.0332183837890625, -2.402667999267578, 27.370031356811523, 53.56047821044922, -6.071514129638672, 21.30487060546875, 0.7937202453613281], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000202.npy"}
|
|
{"epoch": 0.2966226138032305, "step": 203, "batch_size": 64, "mean": 21.060503005981445, "std": 18.404932022094727, "min": -8.97296142578125, "p10": -2.929747390747069, "median": 16.981712341308594, "p90": 46.352630615234375, "max": 86.2940673828125, "pos_frac": 0.875, "sample": [-1.3587074279785156, 41.610904693603516, 49.390193939208984, 16.637718200683594, -3.6030502319335938, 45.66241455078125, 13.062458038330078, 12.062713623046875, 8.995744705200195, 33.6761474609375, 11.706245422363281, 42.66712951660156, 17.16368865966797, 16.79973602294922, -7.914882659912109, 28.888267517089844, 14.231216430664062, 6.206260681152344, 37.7144775390625, 13.173370361328125, 54.40605926513672, 54.595062255859375, 6.657909393310547, 11.564319610595703, 27.10613250732422, 17.54402732849121, 40.520606994628906, 10.6190185546875, -3.857818603515625, 14.278404235839844, -5.77435302734375, 26.44933319091797, 14.088104248046875, 46.71331024169922, 18.085006713867188, 30.695491790771484, 40.23229217529297, 22.893878936767578, 6.318634033203125, 4.6547698974609375, 13.392509460449219, 8.572364807128906, 19.200016021728516, 50.295013427734375, 3.921630859375, -3.6721649169921875, -4.344779968261719, 13.378059387207031, 1.0346660614013672, 9.375129699707031, 42.244041442871094, 19.913166046142578, 29.725513458251953, 18.037378311157227, 33.68553161621094, -8.97296142578125, 28.553619384765625, 46.6484375, 27.432037353515625, 86.2940673828125, 13.238550186157227, 30.337921142578125, 21.105573654174805, 13.914710998535156], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000203.npy"}
|
|
{"epoch": 0.29809104258443464, "step": 204, "batch_size": 64, "mean": 21.9324893951416, "std": 20.08610725402832, "min": -12.054153442382812, "p10": -0.22125968933105436, "median": 18.981603622436523, "p90": 47.944997787475586, "max": 81.93719482421875, "pos_frac": 0.890625, "sample": [19.098953247070312, 29.103748321533203, 5.135318756103516, 17.788896560668945, 36.587646484375, 18.533720016479492, 41.81060791015625, 1.15106201171875, 22.58075714111328, 13.76539421081543, -0.7730140686035156, 20.230222702026367, 4.488685607910156, 26.13836097717285, 24.32628631591797, 25.848865509033203, 23.458160400390625, 28.871482849121094, 16.329620361328125, 16.60837745666504, 61.77644348144531, 9.022552490234375, 1.7927093505859375, -3.8281116485595703, 18.08026123046875, 0.09885978698730469, 19.812942504882812, 49.039058685302734, 35.52814865112305, 7.3801727294921875, 78.15676879882812, 16.42369842529297, -3.9476776123046875, -0.35845375061035156, 8.538955688476562, 47.521183013916016, 9.278945922851562, 1.1389694213867188, 70.11549377441406, -12.054153442382812, 41.1241455078125, 48.12663269042969, 11.629188537597656, 24.761287689208984, 7.55450439453125, 20.20008659362793, 14.981033325195312, 24.597747802734375, 69.9738998413086, 12.687355041503906, 14.522773742675781, 34.00872039794922, -4.9302978515625, 23.010414123535156, 81.93719482421875, 35.60874938964844, 17.040712356567383, 19.3355770111084, 18.864253997802734, 20.219390869140625, -2.410400390625, 31.297212600708008, 30.81406021118164, 4.125152587890625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000204.npy"}
|
|
{"epoch": 0.29955947136563876, "step": 205, "batch_size": 64, "mean": 23.783849716186523, "std": 23.25178337097168, "min": -24.68829345703125, "p10": -2.732733154296875, "median": 19.641587257385254, "p90": 55.9523536682129, "max": 80.36676788330078, "pos_frac": 0.84375, "sample": [0.17007827758789062, 17.924057006835938, 10.679450988769531, 56.855308532714844, 3.091686248779297, 8.89033317565918, -2.0643138885498047, 11.298980712890625, 18.75536346435547, 29.1939697265625, 29.828262329101562, -18.0081787109375, -7.0324554443359375, 30.446306228637695, -2.7972412109375, 35.11137390136719, -2.287534713745117, 79.12289428710938, 52.70109558105469, 19.56545639038086, 38.06584167480469, 18.56842041015625, 1.41082763671875, 39.35188293457031, 39.873291015625, 26.22454833984375, 47.23399353027344, 13.921489715576172, 12.123878479003906, 38.886009216308594, -4.474494934082031, 80.36676788330078, 18.777976989746094, 7.426055908203125, 6.97564697265625, 22.295631408691406, 53.845458984375, 38.21461486816406, 19.21619415283203, 20.994415283203125, -24.68829345703125, 45.15989685058594, 65.61963653564453, 26.768592834472656, 12.581138610839844, 6.878353118896484, 17.561508178710938, 30.327110290527344, 59.69230651855469, 11.86297607421875, 27.09368896484375, 65.85368347167969, 46.023441314697266, 19.71771812438965, 33.95892333984375, 43.0970458984375, 36.14076232910156, 0.772064208984375, -2.58221435546875, -10.904182434082031, 7.959266662597656, 66.44898986816406, -8.152633666992188, 34.23320770263672], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000205.npy"}
|
|
{"epoch": 0.3010279001468429, "step": 206, "batch_size": 64, "mean": 27.303932189941406, "std": 21.4771785736084, "min": -9.468379974365234, "p10": -1.281593322753906, "median": 24.931851387023926, "p90": 57.68658752441407, "max": 98.02444458007812, "pos_frac": 0.875, "sample": [22.318931579589844, 38.427734375, 10.591621398925781, 21.94528579711914, 98.02444458007812, 54.695960998535156, 33.69879913330078, 46.901573181152344, 37.48332214355469, 37.45599365234375, 24.217559814453125, 8.128982543945312, 25.646142959594727, 10.994354248046875, -3.04876708984375, 19.858638763427734, 31.70287322998047, 9.524978637695312, 18.025508880615234, -7.31434440612793, 36.89130401611328, 7.293113708496094, 43.63853454589844, 49.268218994140625, 40.73927307128906, 31.46820068359375, 22.088165283203125, 58.968284606933594, 11.9071044921875, -5.0654449462890625, 50.115203857421875, 60.39935302734375, 34.68971252441406, 22.350357055664062, 11.054147720336914, 48.704925537109375, 21.620285034179688, -9.468379974365234, 61.65470886230469, 15.725051879882812, -1.0990524291992188, 28.924697875976562, 14.69204330444336, 27.341354370117188, 21.671031951904297, -4.526069641113281, 20.633785247802734, 30.75171661376953, 39.09967803955078, 20.41433334350586, -3.5865306854248047, -1.3598251342773438, 30.545127868652344, 8.023048400878906, 64.27775573730469, 35.58952331542969, 8.989103317260742, 37.320316314697266, 38.260047912597656, 62.94358825683594, 7.7255096435546875, 1.7290096282958984, 69.42153930664062, 36.34817123413086], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000206.npy"}
|
|
{"epoch": 0.302496328928047, "step": 207, "batch_size": 64, "mean": 28.46685028076172, "std": 25.99673843383789, "min": -12.064399719238281, "p10": 3.2557680130004885, "median": 21.327754974365234, "p90": 67.30462341308596, "max": 97.75680541992188, "pos_frac": 0.9375, "sample": [3.4512901306152344, 3.6675491333007812, 16.610939025878906, 10.251483917236328, 40.621673583984375, 3.348196029663086, 69.87466430664062, 16.185810089111328, 3.216156005859375, 1.7978134155273438, 41.065948486328125, 1.8373489379882812, 53.19285583496094, -1.0713348388671875, 17.174850463867188, 8.195699691772461, 32.76698303222656, 43.22320556640625, 3.429555892944336, 6.880697250366211, 16.431808471679688, 52.483360290527344, 9.465164184570312, 43.84653854370117, 15.738258361816406, 18.1728515625, 42.2127571105957, 23.46033477783203, -2.5497817993164062, 8.975486755371094, 61.307861328125, 42.896575927734375, 7.5936431884765625, 5.269371032714844, 27.147632598876953, 58.749603271484375, 16.476043701171875, 35.904083251953125, 7.606906890869141, 26.088394165039062, 53.897789001464844, 45.62535095214844, 7.8017730712890625, 53.73493576049805, 26.498863220214844, 26.74042510986328, 15.116676330566406, 71.39747619628906, 95.02377319335938, 43.505767822265625, 18.58758544921875, 93.55014038085938, -6.475175857543945, 30.020429611206055, 97.75680541992188, 24.552200317382812, 6.379911422729492, 29.306602478027344, 4.4890289306640625, 78.29476928710938, -12.064399719238281, 32.65777587890625, 73.28643798828125, 19.195175170898438], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000207.npy"}
|
|
{"epoch": 0.3039647577092511, "step": 208, "batch_size": 64, "mean": 23.237592697143555, "std": 20.613441467285156, "min": -7.67413330078125, "p10": 0.8943748474121103, "median": 18.497854232788086, "p90": 54.434728240966805, "max": 98.78385925292969, "pos_frac": 0.921875, "sample": [18.065196990966797, 19.7159423828125, -2.3447723388671875, 21.87602996826172, 8.821979522705078, 12.097576141357422, 38.81108093261719, 8.696023941040039, 35.87684631347656, 39.126712799072266, 47.99161911010742, 18.930511474609375, 14.606212615966797, 38.03996276855469, 55.182533264160156, 5.950935363769531, 98.78385925292969, 11.518486022949219, 60.10539245605469, 57.44891357421875, 7.863311767578125, 33.506996154785156, 39.640228271484375, 25.303077697753906, 27.609268188476562, 10.157981872558594, 14.123794555664062, -6.1474609375, -7.67413330078125, 45.451141357421875, -0.8304786682128906, 24.610715866088867, 6.521564483642578, 16.98505401611328, 22.003173828125, 35.811920166015625, 68.53941345214844, 7.576831817626953, 12.174064636230469, 33.2789306640625, 21.85363006591797, 8.468055725097656, 8.527572631835938, 35.07832336425781, 15.246231079101562, 52.689849853515625, 21.609434127807617, 0.478302001953125, 3.5599822998046875, 24.99560546875, -0.3373565673828125, 14.044082641601562, 55.682518005371094, 19.935836791992188, 17.68206024169922, 0.5265960693359375, 4.917163848876953, 60.230628967285156, 37.7587890625, 8.9154052734375, 33.731056213378906, 4.31364631652832, 9.739601135253906, 1.7525253295898438], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000208.npy"}
|
|
{"epoch": 0.3054331864904552, "step": 209, "batch_size": 64, "mean": 23.724063873291016, "std": 22.29418182373047, "min": -19.534828186035156, "p10": -4.599222564697265, "median": 19.157148361206055, "p90": 53.52809524536134, "max": 80.18209838867188, "pos_frac": 0.859375, "sample": [57.159637451171875, 26.938411712646484, 40.286617279052734, 2.3531417846679688, 46.818023681640625, 66.6576919555664, 66.79277038574219, 6.067966461181641, 18.85785675048828, 14.274284362792969, 7.189678192138672, 11.145633697509766, 42.25061798095703, 2.9891128540039062, 49.469871520996094, 36.838172912597656, 60.063758850097656, -9.038398742675781, 14.916557312011719, 48.20437240600586, 44.93213653564453, 14.37335205078125, 6.594417572021484, -12.60882568359375, 38.058433532714844, -3.8137893676757812, 2.512788772583008, 22.02625274658203, 7.334716796875, 48.99870681762695, 15.503204345703125, 29.286636352539062, -12.438636779785156, 29.695674896240234, 18.47458267211914, 19.597564697265625, -0.32404136657714844, 19.13845443725586, 80.18209838867188, 43.87178039550781, 7.125251770019531, 55.267333984375, 4.927215576171875, -7.3408355712890625, -4.9358367919921875, 6.416160583496094, 36.83319091796875, 23.56314468383789, 18.258102416992188, 43.592689514160156, 32.317718505859375, 21.781801223754883, 37.91387939453125, -19.534828186035156, 32.128326416015625, 13.790191650390625, 18.267581939697266, 13.707794189453125, 19.17584228515625, -5.3484344482421875, 48.255950927734375, 65.08277130126953, 21.185028076171875, 14.278755187988281], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000209.npy"}
|
|
{"epoch": 0.3069016152716593, "step": 210, "batch_size": 64, "mean": 22.228591918945312, "std": 19.132234573364258, "min": -10.617630004882812, "p10": 2.1065004348754885, "median": 18.057398796081543, "p90": 43.76594123840332, "max": 74.00839233398438, "pos_frac": 0.953125, "sample": [4.820289611816406, 7.968149185180664, 30.751220703125, 35.312686920166016, 19.381027221679688, 38.74726486206055, 43.867584228515625, -1.5572052001953125, 12.676773071289062, -6.505546569824219, 43.52877426147461, 1.102752685546875, 3.659832000732422, -10.617630004882812, 66.77224731445312, 13.093399047851562, 6.8753509521484375, 14.904953002929688, 7.210334777832031, 17.325111389160156, 39.32856750488281, 40.025840759277344, 22.62070655822754, 40.28019714355469, 4.298789978027344, 1.7796134948730469, 2.2794857025146484, 25.5257568359375, 34.20147705078125, 51.69027328491211, 18.78968620300293, 27.451385498046875, 40.62158966064453, 40.99481201171875, 9.563133239746094, 2.0323638916015625, 9.86776351928711, 15.891799926757812, 13.250356674194336, 19.072734832763672, 25.1839542388916, 36.39427185058594, 31.238174438476562, 8.256969451904297, 4.5868682861328125, 3.851806640625, 41.506500244140625, 16.470993041992188, 33.91964340209961, 15.370742797851562, 66.81834411621094, 74.00839233398438, 32.133644104003906, 27.108671188354492, 22.275611877441406, 8.204421997070312, 28.95836639404297, 6.443836212158203, 4.34004020690918, 64.62344360351562, 2.2909698486328125, 1.0330276489257812, 9.787628173828125, 48.93989562988281], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000210.npy"}
|
|
{"epoch": 0.30837004405286345, "step": 211, "batch_size": 64, "mean": 22.822021484375, "std": 21.04338264465332, "min": -16.61715316772461, "p10": -4.273029327392574, "median": 19.454721450805664, "p90": 51.762976074218756, "max": 73.98892211914062, "pos_frac": 0.890625, "sample": [17.28190040588379, 11.604082107543945, 57.046630859375, 35.440284729003906, 25.66695785522461, 11.733932495117188, 24.71820831298828, 32.816978454589844, 73.98892211914062, 15.47576904296875, 32.57135772705078, -14.773408889770508, 5.146472930908203, 11.764724731445312, 27.067916870117188, 52.56324768066406, 38.36468505859375, 12.891815185546875, 5.2969207763671875, 37.98844909667969, 12.047866821289062, 2.6530914306640625, 18.283981323242188, 26.518556594848633, 11.091522216796875, 0.34659576416015625, 6.844596862792969, 59.931182861328125, 16.18527603149414, 58.642547607421875, 71.76571655273438, 10.819046020507812, 13.528812408447266, 21.70386505126953, 36.13032531738281, 18.97442626953125, 6.0611572265625, 46.70914077758789, 27.820838928222656, 25.447479248046875, 47.29210662841797, 30.237533569335938, -11.123844146728516, 2.9927024841308594, 8.453523635864258, 34.82849884033203, 3.7731761932373047, -16.61715316772461, -10.457893371582031, 66.868896484375, -8.452438354492188, 27.71280860900879, 31.771278381347656, 34.17957305908203, 43.377525329589844, 49.89567565917969, 17.608787536621094, 35.25508117675781, -12.402870178222656, 33.405113220214844, 19.935016632080078, 16.634841918945312, 15.532546997070312, -6.25286865234375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000211.npy"}
|
|
{"epoch": 0.30983847283406757, "step": 212, "batch_size": 64, "mean": 28.72591781616211, "std": 25.697410583496094, "min": -9.192626953125, "p10": 0.007090568542482134, "median": 23.664461135864258, "p90": 61.627748107910165, "max": 103.32574462890625, "pos_frac": 0.890625, "sample": [7.720603942871094, -2.803741455078125, 91.60421752929688, 28.564559936523438, 7.797340393066406, 45.963592529296875, 10.032058715820312, 18.952682495117188, 12.678606033325195, 12.341398239135742, 47.07969665527344, 11.148521423339844, 8.222785949707031, -9.192626953125, -3.0339622497558594, 19.499923706054688, 43.775360107421875, 72.10491180419922, 38.36531066894531, 37.94976806640625, 15.33551025390625, -8.233406066894531, 23.55819320678711, 55.472930908203125, 19.423179626464844, 29.58100128173828, 64.13411712646484, 6.500213623046875, 56.48175048828125, -3.1865787506103516, 11.784126281738281, 16.226303100585938, 103.32574462890625, 23.912002563476562, 5.834270477294922, 7.226715087890625, 31.181564331054688, 21.70366668701172, 38.79594421386719, 99.27674865722656, 47.128265380859375, 28.25457000732422, 27.570650100708008, 23.770729064941406, 15.629119873046875, 22.616729736328125, -0.6869964599609375, 1.626626968383789, 62.41999053955078, 7.503574371337891, 24.532665252685547, 55.20976257324219, 51.086029052734375, 27.513565063476562, 68.3786849975586, -1.3311805725097656, 34.35552978515625, 12.910587310791016, 59.77918243408203, 50.46044158935547, 2.6080703735351562, 44.067832946777344, 4.22216796875, 51.72705078125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000212.npy"}
|
|
{"epoch": 0.31130690161527164, "step": 213, "batch_size": 64, "mean": 29.10615348815918, "std": 26.227977752685547, "min": -27.288284301757812, "p10": 2.064271545410157, "median": 25.437053680419922, "p90": 69.36406173706055, "max": 102.19317626953125, "pos_frac": 0.9375, "sample": [88.38034057617188, 7.12652587890625, 9.7425537109375, 2.827394485473633, 12.055206298828125, 2.91796875, 7.624574661254883, 45.24403762817383, -27.288284301757812, -3.882537841796875, 1.4646625518798828, 38.3516845703125, 35.019203186035156, -7.084037780761719, 27.461692810058594, 27.351699829101562, 1.7372188568115234, 24.848358154296875, 59.752708435058594, 46.70036315917969, 84.92529296875, 39.51116943359375, 21.5814266204834, 78.6174545288086, 20.117374420166016, 55.98652648925781, 66.36228942871094, 4.166353225708008, 9.929618835449219, 4.563020706176758, 29.507980346679688, 16.983917236328125, 26.02574920654297, 31.076433181762695, 28.889694213867188, 36.12648010253906, 20.345657348632812, 45.62828063964844, 70.6505355834961, 41.650840759277344, 12.763343811035156, 6.9800567626953125, 102.19317626953125, 6.89349365234375, 10.117935180664062, 34.20819854736328, 55.10930633544922, 74.80581665039062, 23.859619140625, 37.506805419921875, 35.08381652832031, 80.54850769042969, 48.525054931640625, 8.681072235107422, 0.3339080810546875, -0.23764991760253906, 15.519218444824219, 37.69529724121094, 10.297321319580078, 4.245208740234375, 19.142047882080078, 20.192672729492188, 47.11870574951172, 38.21340560913086], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000213.npy"}
|
|
{"epoch": 0.31277533039647576, "step": 214, "batch_size": 64, "mean": 23.016742706298828, "std": 24.110767364501953, "min": -30.781166076660156, "p10": 1.0703193664550792, "median": 15.336590766906738, "p90": 58.44501876831055, "max": 88.16409301757812, "pos_frac": 0.90625, "sample": [12.370285034179688, 60.33439636230469, 2.423259735107422, 7.337879180908203, 6.963617324829102, 2.627685546875, 6.352222442626953, -30.781166076660156, 57.476234436035156, 11.648576736450195, 51.83263397216797, 36.40279006958008, 23.09241485595703, -0.6521453857421875, 28.153823852539062, 14.093189239501953, 7.36505126953125, 2.3441123962402344, 6.177177429199219, 30.585670471191406, 0.6538848876953125, 12.168067932128906, 25.60663604736328, 16.325439453125, 56.61018371582031, 23.105382919311523, 25.228782653808594, 4.460386276245117, 6.341938018798828, 2.0419998168945312, 20.663333892822266, 19.382354736328125, 28.204994201660156, 68.58673095703125, 10.592315673828125, 88.16409301757812, 58.65129852294922, 25.66961669921875, -0.076324462890625, 57.87611389160156, 21.47802734375, 23.62071990966797, 57.96369934082031, 73.54167175292969, -15.701131820678711, 61.424224853515625, 9.23353385925293, 19.54711151123047, 55.22734069824219, 24.72219467163086, 10.55389404296875, 14.562347412109375, 5.872688293457031, 7.742588043212891, 14.405105590820312, 36.637664794921875, 13.370159149169922, 55.05153274536133, 16.1108341217041, 8.587936401367188, 6.794147491455078, 74.34907531738281, -5.525337219238281, -2.903423309326172], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000214.npy"}
|
|
{"epoch": 0.3142437591776799, "step": 215, "batch_size": 64, "mean": 24.892501831054688, "std": 23.99720573425293, "min": -20.050613403320312, "p10": 2.07691879272461, "median": 18.657175064086914, "p90": 60.667986297607435, "max": 96.96463012695312, "pos_frac": 0.90625, "sample": [19.330684661865234, 63.46508026123047, 36.71075439453125, 9.799690246582031, -0.36049652099609375, 4.618255615234375, -3.6843338012695312, 43.423667907714844, 31.292068481445312, 17.983665466308594, 19.809368133544922, 10.893369674682617, 7.961282730102539, 17.775407791137695, 79.54534912109375, 35.153221130371094, 29.70209503173828, 50.63105773925781, 92.05873107910156, 31.436166763305664, 2.6168670654296875, 31.198394775390625, 20.02745819091797, 3.6434326171875, 13.42730712890625, 14.035301208496094, -0.2033977508544922, 15.263191223144531, 57.6002311706543, 11.02923583984375, 4.889247894287109, 17.02964210510254, 13.14498519897461, 17.359880447387695, 61.98273849487305, 27.022790908813477, 1.8455123901367188, 53.33233642578125, 42.726470947265625, 22.926223754882812, 67.0540542602539, 25.437400817871094, 26.795459747314453, 17.528213500976562, 66.73684692382812, 15.611114501953125, 35.389427185058594, 5.185829162597656, 12.765087127685547, 50.02577209472656, -20.050613403320312, 34.33409881591797, 96.96463012695312, 7.425647735595703, 22.8717041015625, -13.054656982421875, 42.7899169921875, 6.066192626953125, 4.733577728271484, 24.959867477416992, 27.937063217163086, 4.363811492919922, -9.055063247680664, 11.861770629882812], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000215.npy"}
|
|
{"epoch": 0.315712187958884, "step": 216, "batch_size": 64, "mean": 26.958616256713867, "std": 26.480247497558594, "min": -17.63250732421875, "p10": -0.8274059295654292, "median": 23.573158264160156, "p90": 67.80697326660157, "max": 91.19178771972656, "pos_frac": 0.875, "sample": [35.76905822753906, 33.68310546875, -12.441654205322266, 32.21778106689453, 31.8660888671875, 16.148340225219727, -0.9903068542480469, 24.74536895751953, 49.34698486328125, 1.7139816284179688, 91.19178771972656, 3.5245513916015625, 32.903934478759766, -6.4858245849609375, 10.42233657836914, 10.422409057617188, 1.993825912475586, 45.4633903503418, 5.148731231689453, -9.50836181640625, 6.162773132324219, 73.5987548828125, 6.9838104248046875, 38.83204650878906, 61.45287322998047, 22.712982177734375, 5.0842132568359375, 46.646907806396484, 57.23625564575195, 28.36760139465332, 30.096649169921875, 31.8194580078125, 24.433334350585938, 3.723329544067383, 69.14627075195312, 30.046844482421875, -17.63250732421875, 79.35774230957031, 5.479240417480469, 15.950386047363281, 3.249876022338867, 36.143096923828125, 13.185955047607422, -0.44730377197265625, 21.289901733398438, 28.8001708984375, 59.28229522705078, 79.54502868652344, 11.703033447265625, 1.449066162109375, 70.58522033691406, 16.027992248535156, 3.2765655517578125, -1.0146598815917969, 11.528009414672852, 55.87567138671875, 62.079505920410156, 40.20903015136719, 64.68194580078125, 78.29891967773438, 15.992813110351562, 0.6772098541259766, -3.5839061737060547, 39.881553649902344], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000216.npy"}
|
|
{"epoch": 0.31718061674008813, "step": 217, "batch_size": 64, "mean": 27.929889678955078, "std": 21.522830963134766, "min": -17.742835998535156, "p10": 3.8765695571899417, "median": 27.046737670898438, "p90": 52.28611679077149, "max": 85.60844421386719, "pos_frac": 0.921875, "sample": [20.89208984375, 29.249290466308594, 29.294723510742188, 3.812681198120117, 7.85546875, 12.588241577148438, 22.343229293823242, 43.48637390136719, 28.4556884765625, 13.176116943359375, -0.5269012451171875, 77.51924133300781, 47.089927673339844, 39.576385498046875, -10.445777893066406, 15.933971405029297, 19.778736114501953, -17.742835998535156, 33.21563720703125, 83.07131958007812, -2.217254638671875, 18.082612991333008, 20.37531280517578, 27.846954345703125, 52.33399200439453, 32.896942138671875, 28.722579956054688, 4.025642395019531, 7.1475067138671875, 33.67303466796875, 85.60844421386719, 41.126792907714844, 7.432275772094727, 16.561077117919922, 48.24644470214844, 25.811038970947266, 26.24652099609375, 52.489051818847656, 17.474559783935547, 51.49058532714844, 44.3406982421875, 2.0224075317382812, 56.57969665527344, 42.14141845703125, 22.585041046142578, 52.174407958984375, 14.599136352539062, 35.98456573486328, 36.149070739746094, 32.498382568359375, 30.240951538085938, 35.929954528808594, 81.65815734863281, 24.199533462524414, 42.982669830322266, 8.452293395996094, 10.190689086914062, 9.678947448730469, 11.306541442871094, 10.403060913085938, 18.086715698242188, 36.06449890136719, 36.62120056152344, -1.3749198913574219], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000217.npy"}
|
|
{"epoch": 0.3186490455212922, "step": 218, "batch_size": 64, "mean": 24.937423706054688, "std": 24.085628509521484, "min": -10.532302856445312, "p10": 0.6269290924072269, "median": 19.347110748291016, "p90": 54.90351028442384, "max": 114.82046508789062, "pos_frac": 0.90625, "sample": [24.881786346435547, 12.814323425292969, 17.51386260986328, 19.630489349365234, 41.15424346923828, 21.14805793762207, 77.85220336914062, 2.7710494995117188, 8.155097961425781, 19.099990844726562, 52.32189178466797, -3.8540267944335938, 12.088459014892578, 47.81861114501953, 71.12263488769531, 12.52484130859375, 25.923553466796875, 9.631534576416016, 5.364097595214844, 29.640159606933594, 9.812789916992188, 19.59423065185547, 9.313312530517578, 41.339691162109375, 7.299995422363281, -8.39739990234375, 50.600074768066406, 45.72340393066406, 33.379249572753906, 39.896446228027344, 14.244218826293945, 9.21575927734375, 23.631744384765625, 51.39904022216797, 39.54937744140625, 43.2432861328125, 6.157098770141602, 0.4721488952636719, 29.160306930541992, 3.180675506591797, 19.077335357666016, -10.532302856445312, 56.009918212890625, 8.938884735107422, -1.6817092895507812, -6.0142059326171875, 114.82046508789062, 51.915740966796875, 43.00019836425781, 13.814079284667969, 3.6641387939453125, 58.6177978515625, 60.65142822265625, 1.4807682037353516, 10.488201141357422, 34.20447540283203, 66.95352935791016, 28.42537498474121, 6.48307991027832, -9.701667785644531, 16.071929931640625, 0.9880828857421875, 31.788238525390625, 20.113014221191406], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000218.npy"}
|
|
{"epoch": 0.3201174743024963, "step": 219, "batch_size": 64, "mean": 22.00741958618164, "std": 19.767765045166016, "min": -14.735198974609375, "p10": 1.2967185974121098, "median": 17.557645797729492, "p90": 57.9971923828125, "max": 69.06327819824219, "pos_frac": 0.9375, "sample": [69.06327819824219, 26.962175369262695, 11.994518280029297, 62.32878875732422, 8.515775680541992, 57.53736877441406, -14.735198974609375, 3.365720748901367, 1.131591796875, 35.571075439453125, 11.797792434692383, 16.88451385498047, 29.077835083007812, 68.71771240234375, 62.549537658691406, 14.639543533325195, 0.13486480712890625, 58.19425964355469, 48.011962890625, 26.508193969726562, 16.81304168701172, 7.05224609375, -13.217737197875977, 17.626827239990234, 29.64806365966797, 6.692008972167969, 25.929054260253906, 8.220829010009766, 59.63377380371094, 23.643531799316406, 13.540882110595703, 62.886260986328125, 17.9219970703125, 7.653547286987305, 18.087615966796875, 37.68799591064453, 31.78461456298828, 1.6820144653320312, 24.848976135253906, 9.841583251953125, 33.97310256958008, 15.305976867675781, 30.42066192626953, 6.568115234375, 11.68310546875, 17.48846435546875, 11.410369873046875, 9.651079177856445, 2.4006423950195312, 39.803009033203125, 17.950042724609375, 29.0167236328125, 10.459953308105469, -3.5826759338378906, -3.7217254638671875, 41.880401611328125, 20.727493286132812, 28.536598205566406, 15.833698272705078, 0.441986083984375, 8.305633544921875, 25.172584533691406, 23.554166793823242, 8.966920852661133], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000219.npy"}
|
|
{"epoch": 0.32158590308370044, "step": 220, "batch_size": 64, "mean": 20.889972686767578, "std": 17.149866104125977, "min": -4.3613433837890625, "p10": -0.41230125427246034, "median": 17.606765747070312, "p90": 47.81003112792969, "max": 59.024261474609375, "pos_frac": 0.890625, "sample": [55.46270751953125, 6.541515350341797, 16.49730110168457, 2.1822128295898438, 26.402650833129883, 47.652366638183594, 59.024261474609375, -0.8940353393554688, 28.82701873779297, 28.482521057128906, 21.94068145751953, 11.300010681152344, 45.641387939453125, 10.941566467285156, -0.6484489440917969, 13.304168701171875, 58.58219909667969, 27.20220375061035, 33.837242126464844, 36.5300407409668, 43.852874755859375, 2.8043441772460938, 13.140266418457031, 10.432754516601562, -4.3613433837890625, 47.877601623535156, 11.997079849243164, 16.858535766601562, 52.0391845703125, 20.803112030029297, 28.385391235351562, 18.354995727539062, 32.805233001708984, 10.589698791503906, 31.623794555664062, 28.79942512512207, 19.789993286132812, 54.43290710449219, 7.1971435546875, 13.02728271484375, 1.2664775848388672, 3.7135696411132812, 0.13871002197265625, 14.801437377929688, 3.7013187408447266, -1.8406829833984375, 36.958526611328125, 12.590774536132812, 1.5791969299316406, 14.869743347167969, 8.616508483886719, 21.191497802734375, 51.45935821533203, -2.181917190551758, 20.314151763916016, 36.4516716003418, 20.773120880126953, 27.09947967529297, 21.766326904296875, 12.116424560546875, 15.070323944091797, 29.311386108398438, -0.878143310546875, -1.190948486328125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000220.npy"}
|
|
{"epoch": 0.32305433186490456, "step": 221, "batch_size": 64, "mean": 32.81925582885742, "std": 22.986146926879883, "min": -12.770103454589844, "p10": 5.0504592895507825, "median": 29.558914184570312, "p90": 64.3977119445801, "max": 87.86102294921875, "pos_frac": 0.953125, "sample": [25.850967407226562, 16.09912109375, 29.398147583007812, 8.696676254272461, 30.889493942260742, 87.86102294921875, 24.9345703125, 84.06724548339844, 42.63178253173828, 59.83679962158203, 66.00720977783203, 0.9115409851074219, 68.92564392089844, 13.6240234375, 27.313753128051758, 53.84666442871094, 8.179229736328125, 58.037689208984375, 18.642065048217773, 47.88336944580078, 53.05713653564453, 16.126907348632812, 37.291954040527344, 71.41612243652344, 34.61804962158203, 54.967018127441406, 52.98655700683594, -8.215850830078125, 17.940261840820312, 11.159614562988281, 29.719680786132812, 28.05913543701172, 4.664058685302734, 28.59119415283203, 26.816360473632812, 30.905624389648438, 13.869760513305664, -12.770103454589844, 28.733169555664062, 55.62297058105469, 5.947601318359375, 65.48956298828125, 41.95037078857422, 33.11865234375, 35.250099182128906, 29.96288299560547, 14.806365966796875, 77.92547607421875, 28.0970458984375, 33.94647216796875, 22.871679306030273, -12.437524795532227, 32.88641357421875, 54.75917053222656, 4.6659698486328125, 12.144298553466797, 23.580408096313477, 43.25206756591797, 16.778762817382812, 26.755996704101562, 61.850059509277344, 4.056631088256836, 61.17559814453125, 32.40168762207031], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000221.npy"}
|
|
{"epoch": 0.3245227606461087, "step": 222, "batch_size": 64, "mean": 26.146482467651367, "std": 25.172771453857422, "min": -27.625926971435547, "p10": 0.5465541839599618, "median": 20.362693786621094, "p90": 63.852415084838874, "max": 105.46268463134766, "pos_frac": 0.90625, "sample": [4.1250457763671875, 53.767181396484375, 19.089599609375, 11.364892959594727, 7.063117980957031, 20.12663459777832, 6.02337646484375, 5.863006591796875, 12.300384521484375, 2.070831298828125, 20.55579376220703, 40.62813186645508, 105.46268463134766, 19.507030487060547, 41.42816162109375, 17.562965393066406, -8.768516540527344, 19.426673889160156, 59.77168273925781, 21.164443969726562, 36.191864013671875, 24.283470153808594, 7.9422607421875, 67.136474609375, -5.312553405761719, 29.610923767089844, 79.89579772949219, 18.504051208496094, 45.10877227783203, 3.9720497131347656, 46.416404724121094, 2.7089996337890625, 62.08864974975586, -0.6708469390869141, 27.154373168945312, 39.39112854003906, 3.9000492095947266, 26.56795310974121, 0.18031692504882812, 27.6466064453125, 1.4011077880859375, 31.341781616210938, -14.701560974121094, -6.8629913330078125, 77.08380126953125, 16.37323760986328, 38.032630920410156, 64.60831451416016, 11.217931747436523, 22.615615844726562, 51.503456115722656, 41.35834503173828, 66.32760620117188, 17.85599136352539, 2.201873779296875, 25.324745178222656, 37.34077453613281, -27.625926971435547, 31.58203125, 20.169593811035156, 44.113555908203125, 19.19940185546875, 68.69171142578125, 12.972038269042969], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000222.npy"}
|
|
{"epoch": 0.32599118942731276, "step": 223, "batch_size": 64, "mean": 22.292381286621094, "std": 22.9637451171875, "min": -23.31121063232422, "p10": -6.359392738342283, "median": 18.846364974975586, "p90": 51.67638244628906, "max": 77.1607666015625, "pos_frac": 0.828125, "sample": [10.247116088867188, -22.246612548828125, 33.32939910888672, 22.365066528320312, -1.5476112365722656, 29.115188598632812, 53.67364501953125, 15.507705688476562, 24.495254516601562, 43.38078308105469, 11.516695022583008, 8.7447509765625, 15.275222778320312, 38.360382080078125, 62.16981506347656, 38.066986083984375, 18.91522979736328, 23.47964096069336, 77.1607666015625, 6.289157867431641, 38.902748107910156, -7.420648574829102, 36.33232879638672, -22.549274444580078, 12.484539031982422, 50.2254638671875, -0.6914100646972656, 51.822601318359375, 44.374786376953125, -14.826454162597656, 8.957656860351562, 5.721866607666016, 56.83622741699219, -17.584564208984375, 31.227588653564453, 49.3392333984375, 2.261688232421875, 29.56853485107422, 18.77750015258789, 71.64593505859375, 49.83423614501953, -3.883129119873047, 15.119871139526367, 18.531341552734375, 10.6959228515625, 11.136104583740234, 36.17332458496094, 51.335205078125, 13.667823791503906, 11.280242919921875, 24.635847091674805, -0.17233848571777344, -12.238067626953125, 55.4246826171875, 15.291946411132812, 34.3952751159668, 4.785234451293945, 11.275251388549805, 13.870292663574219, 41.989410400390625, 27.426435470581055, 30.999420166015625, -23.31121063232422, 34.74440002441406], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000223.npy"}
|
|
{"epoch": 0.3274596182085169, "step": 224, "batch_size": 64, "mean": 26.778461456298828, "std": 25.32072639465332, "min": -29.02960968017578, "p10": 0.05366420745849622, "median": 21.03948402404785, "p90": 66.3917678833008, "max": 86.04985046386719, "pos_frac": 0.90625, "sample": [38.67302703857422, 7.130523681640625, 10.546218872070312, 10.187271118164062, -0.5490455627441406, 57.44561004638672, 33.11815643310547, 7.55134391784668, 22.600143432617188, -29.02960968017578, 35.12175750732422, 74.7192611694336, 57.207645416259766, 19.822839736938477, 73.25634765625, 16.425819396972656, 31.963882446289062, 51.71648406982422, 30.473445892333984, 86.04985046386719, 60.04676818847656, 28.290191650390625, 42.08972930908203, 53.660865783691406, 13.705123901367188, -10.752376556396484, 29.22191619873047, 24.860605239868164, 47.499916076660156, 0.23942947387695312, 11.898162841796875, 17.871742248535156, 17.001033782958984, 71.22137451171875, 38.45256042480469, 45.41845703125, 16.86627960205078, 1.1301956176757812, 10.378105163574219, 21.042308807373047, 51.4593391418457, -6.318572998046875, 5.930961608886719, 0.0013675689697265625, 21.535707473754883, -14.62397575378418, 10.834442138671875, 8.075721740722656, 6.865119934082031, 20.584909439086914, 28.487342834472656, 48.63492965698242, 78.2537841796875, 13.218042373657227, 4.778425216674805, 10.35042953491211, 21.036659240722656, 33.461875915527344, 13.676738739013672, 0.175689697265625, 35.38917541503906, 80.48797607421875, 69.11105346679688, -2.159008026123047], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000224.npy"}
|
|
{"epoch": 0.328928046989721, "step": 225, "batch_size": 64, "mean": 32.95307922363281, "std": 26.78766632080078, "min": -11.536447525024414, "p10": 1.7362293243408218, "median": 29.352563858032227, "p90": 57.44227752685548, "max": 129.77105712890625, "pos_frac": 0.921875, "sample": [68.2926025390625, 48.717559814453125, 64.57060241699219, 83.85316467285156, 3.231609344482422, 11.008926391601562, 4.228340148925781, 22.7686767578125, 31.4437255859375, 53.069305419921875, 29.003883361816406, 52.29090118408203, 1.0953521728515625, 3.9480667114257812, 52.260009765625, 15.379268646240234, -11.536447525024414, 10.035085678100586, 0.3580818176269531, 9.750785827636719, 36.61107635498047, 35.51648712158203, 41.85675811767578, 55.04022979736328, 3.6606197357177734, 6.779655456542969, 55.386016845703125, 21.123451232910156, 28.48157501220703, 22.02393341064453, 82.06373596191406, 40.48116683959961, 52.96049499511719, 24.84674072265625, -9.013656616210938, 95.07522583007812, 44.560638427734375, -4.661979675292969, 53.72504425048828, 52.883148193359375, 29.326374053955078, 45.09394836425781, 12.853645324707031, 3.428800582885742, 48.97346496582031, 15.794242858886719, 129.77105712890625, 37.99873352050781, 34.5113525390625, 25.208038330078125, 55.03532409667969, 27.387603759765625, 49.470420837402344, 14.496978759765625, 9.764598846435547, 58.32353210449219, 45.95062255859375, 28.316818237304688, 43.838966369628906, -4.584930419921875, 25.41686248779297, 53.16542434692383, -3.093414306640625, 29.378753662109375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000225.npy"}
|
|
{"epoch": 0.3303964757709251, "step": 226, "batch_size": 64, "mean": 25.563575744628906, "std": 22.32744026184082, "min": -19.016036987304688, "p10": 2.3221681594848635, "median": 19.539816856384277, "p90": 59.095331573486334, "max": 87.50688171386719, "pos_frac": 0.90625, "sample": [87.50688171386719, -5.5353546142578125, 4.926792144775391, 32.05493927001953, 2.604795455932617, 55.26925277709961, 14.431015014648438, 29.662033081054688, 19.743553161621094, 2.2010421752929688, 35.739158630371094, 7.098320007324219, -19.016036987304688, 7.673698425292969, -1.8499755859375, 34.37754440307617, 16.221378326416016, 24.7567138671875, 43.89540100097656, 11.98797607421875, 40.81626892089844, 65.54747009277344, 25.614181518554688, 28.97394561767578, 6.8004302978515625, 44.01776123046875, 19.564064025878906, 24.594680786132812, 19.120376586914062, 61.76673889160156, 33.58465576171875, 7.571466445922852, 40.666053771972656, 26.46092414855957, 20.1420841217041, 53.174774169921875, 55.21435546875, 65.17533874511719, 59.63123321533203, -1.0248565673828125, 7.6901397705078125, 7.095359802246094, -5.460002899169922, 17.514984130859375, 14.172744750976562, -4.694549560546875, 16.20677947998047, 8.787178039550781, 18.351207733154297, 18.575706481933594, 9.126007080078125, 69.48262023925781, 27.914871215820312, 36.996192932128906, 19.51556968688965, 47.40106201171875, 12.78363037109375, 3.964609146118164, 76.24415588378906, 15.22576904296875, 14.839027404785156, 14.497346878051758, 30.832382202148438, 57.84489440917969], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000226.npy"}
|
|
{"epoch": 0.33186490455212925, "step": 227, "batch_size": 64, "mean": 31.536819458007812, "std": 26.311187744140625, "min": -12.12961196899414, "p10": 0.24247131347656325, "median": 24.61126136779785, "p90": 68.75740356445313, "max": 100.32586669921875, "pos_frac": 0.890625, "sample": [82.7899169921875, 18.978195190429688, 22.66425323486328, 78.14998626708984, 38.230140686035156, 38.46428680419922, 42.322662353515625, 47.58306884765625, 40.715797424316406, 2.0978660583496094, 11.890853881835938, 36.270050048828125, 22.86896514892578, -1.5636825561523438, -1.0010948181152344, 14.858314514160156, 49.99744415283203, 8.156150817871094, 37.06141662597656, 100.32586669921875, 36.20649719238281, 17.790874481201172, 15.617008209228516, 13.47109603881836, 22.222965240478516, 50.36415100097656, 59.039817810058594, 35.335044860839844, 37.01271057128906, 27.426136016845703, 38.951454162597656, 52.37683868408203, 42.61029052734375, 2.6134109497070312, -3.7937774658203125, 3.364349365234375, 18.633193969726562, 47.2579345703125, 66.43878173828125, 24.521697998046875, 24.700824737548828, 69.7510986328125, -12.12961196899414, 98.65560913085938, 8.635053634643555, 17.984817504882812, 24.195905685424805, 15.316558837890625, 36.95960235595703, 19.838821411132812, 30.902976989746094, 81.77389526367188, 87.0303955078125, 18.228683471679688, -2.5243988037109375, -0.08241844177246094, 59.15348815917969, -9.579742431640625, 1.0005474090576172, 16.444076538085938, 13.302820205688477, 19.517911911010742, 40.32093048095703, 60.637603759765625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000227.npy"}
|
|
{"epoch": 0.3333333333333333, "step": 228, "batch_size": 64, "mean": 24.27182960510254, "std": 20.918087005615234, "min": -19.869964599609375, "p10": -1.385177993774413, "median": 23.772400856018066, "p90": 52.129859924316406, "max": 77.79234313964844, "pos_frac": 0.875, "sample": [10.281929016113281, -19.869964599609375, -4.473785400390625, 27.381729125976562, 36.12739562988281, 77.79234313964844, -9.570411682128906, 21.372867584228516, 13.471792221069336, -1.8542518615722656, 7.319915771484375, 21.25958251953125, 52.30987548828125, 35.08483123779297, 64.10169982910156, 70.59597778320312, 58.33555603027344, 24.702198028564453, 31.49262809753418, 10.918960571289062, -6.506675720214844, 43.76287078857422, 18.055885314941406, 26.029067993164062, 38.55510711669922, 13.511505126953125, 26.660934448242188, 9.291830062866211, -0.29067230224609375, 43.20137023925781, 48.216773986816406, 17.241233825683594, 5.872919082641602, 55.111297607421875, 35.1463623046875, 10.385489463806152, 10.292510986328125, 17.23284149169922, 3.71551513671875, 38.068634033203125, 23.469552993774414, 24.07524871826172, 27.593307495117188, 13.145774841308594, 21.968093872070312, 28.75890350341797, 51.70982360839844, 1.7380218505859375, 30.951385498046875, 1.9208526611328125, 69.73886108398438, 19.24881362915039, -10.387725830078125, 33.00274658203125, 30.90630340576172, 35.01060485839844, 33.70381164550781, 27.405357360839844, 17.838829040527344, 49.458953857421875, 1.2676162719726562, 30.300155639648438, -5.191925048828125, 15.428062438964844], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000228.npy"}
|
|
{"epoch": 0.33480176211453744, "step": 229, "batch_size": 64, "mean": 27.103031158447266, "std": 25.559297561645508, "min": -31.225284576416016, "p10": 1.4650747299194342, "median": 21.140487670898438, "p90": 59.60950546264649, "max": 102.23683166503906, "pos_frac": 0.921875, "sample": [8.830154418945312, 5.075067520141602, 17.482269287109375, 57.83171081542969, 19.975482940673828, -16.10281753540039, 7.969779968261719, 53.111175537109375, 21.12786102294922, 52.051841735839844, 78.21507263183594, 59.010704040527344, 30.598281860351562, 62.664695739746094, 38.48722839355469, 32.790382385253906, 17.578392028808594, 18.92241668701172, 20.13705825805664, 62.652008056640625, 2.1920032501220703, 22.069923400878906, 6.591747283935547, 38.16071701049805, 9.303071975708008, 18.840373992919922, 102.23683166503906, -14.2669677734375, 12.443290710449219, 48.573944091796875, 33.51830291748047, 46.32921600341797, 33.13707733154297, 58.93275451660156, 66.33869934082031, 23.196962356567383, 65.32774353027344, 16.083518981933594, 15.35528564453125, -0.2688446044921875, 36.16985321044922, 3.0402984619140625, 8.823837280273438, 23.13678550720215, 2.5920066833496094, 21.153114318847656, 52.86979675292969, 52.99947738647461, 1.153533935546875, -31.225284576416016, 20.460290908813477, 59.86613464355469, 16.0504093170166, 22.66082763671875, 44.86668395996094, 36.62818908691406, 3.9892501831054688, 47.424835205078125, 2.843048095703125, 58.04214859008789, -17.49127197265625, 0.7612552642822266, 2.7063140869140625, 12.56805419921875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000229.npy"}
|
|
{"epoch": 0.33627019089574156, "step": 230, "batch_size": 64, "mean": 30.37428092956543, "std": 27.951574325561523, "min": -30.670623779296875, "p10": 1.9100021362304704, "median": 25.047624588012695, "p90": 76.90207901000977, "max": 98.85956573486328, "pos_frac": 0.921875, "sample": [18.136932373046875, 62.864845275878906, 7.042497634887695, 57.144981384277344, 20.92353057861328, 23.7531681060791, 27.184659957885742, 38.24927520751953, 20.66701316833496, 11.018383026123047, 26.119617462158203, 37.747291564941406, -25.054542541503906, 35.69775390625, 41.715240478515625, 24.7413330078125, -3.7394561767578125, 31.755584716796875, 4.0427093505859375, 27.020565032958984, 16.898406982421875, -5.934709548950195, 0.38605499267578125, 29.495227813720703, 9.768848419189453, 18.717212677001953, 18.013145446777344, 41.024391174316406, 56.45806884765625, 31.511985778808594, 18.99592399597168, 14.555294036865234, 16.62738037109375, 76.00582885742188, 47.35818099975586, 3.4720458984375, 27.36842918395996, 41.81475830078125, 36.41245651245117, 87.36439514160156, -0.2540626525878906, 1.2405548095703125, -30.670623779296875, 16.82897186279297, 19.07270050048828, 21.291797637939453, 77.28618621826172, 28.411771774291992, 91.3683853149414, 54.564910888671875, 28.838367462158203, 16.50790786743164, 25.35391616821289, 5.310323715209961, 80.9263916015625, 22.655014038085938, 98.85956573486328, 14.188644409179688, 96.21284484863281, 40.015846252441406, 60.44758605957031, 4.723106384277344, 8.919998168945312, 88.50918579101562], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000230.npy"}
|
|
{"epoch": 0.3377386196769457, "step": 231, "batch_size": 64, "mean": 26.028377532958984, "std": 25.084552764892578, "min": -6.858421325683594, "p10": 3.54089698791504, "median": 16.68770980834961, "p90": 62.88970031738282, "max": 111.73956298828125, "pos_frac": 0.9375, "sample": [111.73956298828125, 33.92093276977539, 19.502174377441406, 35.660125732421875, 8.354957580566406, 27.31232452392578, 65.06013488769531, 6.340736389160156, 15.751523971557617, 70.0824203491211, 7.757053375244141, 7.4480743408203125, 8.853141784667969, 26.795421600341797, 82.66180419921875, 15.890840530395508, 30.497947692871094, 46.801273345947266, 41.26776885986328, 42.09718322753906, 67.57093048095703, 27.49972915649414, 13.985923767089844, 54.4727783203125, 8.483675003051758, 7.381797790527344, 35.18177795410156, 0.9001197814941406, 63.606109619140625, 94.11079406738281, 61.21807861328125, 60.89402770996094, 20.817718505859375, 10.675527572631836, -6.858421325683594, 10.100852966308594, 12.236007690429688, 43.69115447998047, 8.134323120117188, 4.488189697265625, 10.136672973632812, 11.482202529907227, 23.434646606445312, 5.690887451171875, 6.401939392089844, 6.722881317138672, 30.0111083984375, 4.890565872192383, 3.1349143981933594, 18.65673828125, 45.98271179199219, 35.79540252685547, 1.3991432189941406, 8.542125701904297, -1.9079608917236328, 52.523250579833984, -3.2178688049316406, 12.56039047241211, -0.2533721923828125, 17.359710693359375, 9.08782958984375, 27.9473876953125, 21.03266143798828, 16.015708923339844], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000231.npy"}
|
|
{"epoch": 0.3392070484581498, "step": 232, "batch_size": 64, "mean": 21.21156120300293, "std": 23.13524055480957, "min": -16.716339111328125, "p10": -6.193288421630858, "median": 17.532981872558594, "p90": 50.789568328857435, "max": 104.99148559570312, "pos_frac": 0.8125, "sample": [31.340545654296875, 31.062454223632812, 1.7677841186523438, 48.28233337402344, 3.27655029296875, 20.34667205810547, 36.440185546875, 38.86700439453125, 7.7740478515625, 57.51078796386719, -1.7901763916015625, 29.84790802001953, 38.759666442871094, -10.738121032714844, -4.824310302734375, 3.1395416259765625, 18.041473388671875, 37.383155822753906, -10.955215454101562, 15.019515991210938, 37.97840118408203, 69.13893127441406, -3.19940185546875, 31.063430786132812, 24.117095947265625, 7.516521453857422, 64.175048828125, -16.716339111328125, 10.328170776367188, 51.864097595214844, 21.85289764404297, 15.083793640136719, 10.428863525390625, 17.024490356445312, -5.0508270263671875, 32.34089660644531, -6.754646301269531, 2.076108932495117, 22.561447143554688, 104.99148559570312, 26.481201171875, -6.682914733886719, 18.384563446044922, -9.539073944091797, 14.128040313720703, 14.082704544067383, 37.05839538574219, -8.701696395874023, 36.89530944824219, -0.5399761199951172, 67.64466857910156, 8.138629913330078, 9.411026000976562, 43.455177307128906, 59.20771026611328, 31.539533615112305, 32.95642852783203, 31.66387939453125, 5.4121246337890625, 15.400177001953125, 5.5055694580078125, 17.009437561035156, 23.331436157226562, 3.9253921508789062], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000232.npy"}
|
|
{"epoch": 0.3406754772393539, "step": 233, "batch_size": 64, "mean": 27.884883880615234, "std": 26.594215393066406, "min": -23.31106185913086, "p10": -1.431148910522458, "median": 22.731292724609375, "p90": 60.7330436706543, "max": 130.25161743164062, "pos_frac": 0.890625, "sample": [7.6144866943359375, 13.579265594482422, 60.072265625, 22.03753662109375, 14.22787857055664, 37.936363220214844, -7.617069244384766, -10.160179138183594, 6.3116607666015625, -23.31106185913086, 17.27008056640625, 33.37453079223633, 6.843055725097656, 45.957801818847656, 76.01849365234375, 71.03666687011719, 22.75896453857422, 17.23178482055664, 47.578147888183594, 59.07749938964844, 22.70362091064453, 40.85392761230469, 15.513084411621094, 60.460487365722656, 11.878425598144531, 34.509761810302734, 14.655937194824219, -10.362548828125, -7.939048767089844, 2.0049171447753906, 48.328895568847656, 40.497955322265625, 26.932113647460938, 49.57732391357422, 35.987701416015625, 58.11578369140625, 1.756561279296875, 52.26979064941406, 8.513290405273438, 25.477598190307617, 36.58088684082031, 61.548858642578125, 11.54037094116211, 28.3134765625, 18.551658630371094, 60.849853515625, 13.660053253173828, -12.891517639160156, 1.5022506713867188, 4.7104644775390625, 13.079437255859375, 25.074935913085938, 62.30387878417969, 40.04412841796875, 6.4405059814453125, 20.267257690429688, 9.864749908447266, 49.168357849121094, -2.6883201599121094, 130.25161743164062, 67.02566528320312, 15.299240112304688, 33.151702880859375, 41.409446716308594], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000233.npy"}
|
|
{"epoch": 0.342143906020558, "step": 234, "batch_size": 64, "mean": 28.14922332763672, "std": 22.41670799255371, "min": -35.81159973144531, "p10": 3.048023796081543, "median": 23.51495361328125, "p90": 60.15936355590822, "max": 89.17666625976562, "pos_frac": 0.9375, "sample": [-0.12805938720703125, 45.01659393310547, 53.0279541015625, 16.56536865234375, 37.77420425415039, 45.35387420654297, 30.782882690429688, 36.0564079284668, 53.65283966064453, 52.87040710449219, 15.699676513671875, 11.580001831054688, -35.81159973144531, 11.64095687866211, 13.671958923339844, 27.476234436035156, 6.213706970214844, 13.291267395019531, 19.6119384765625, 44.74420166015625, 75.23682403564453, 55.01060485839844, 8.691572189331055, 14.368003845214844, 24.247482299804688, 2.973968505859375, 43.62879943847656, 27.054046630859375, 13.196403503417969, 64.57351684570312, 3.013141632080078, 40.88860321044922, 62.36597442626953, 46.29887771606445, 89.17666625976562, 22.782424926757812, 26.86590576171875, 40.72896957397461, 18.58498764038086, 21.307662963867188, 35.97509765625, 41.788909912109375, 47.88642120361328, 15.55903434753418, 3.129415512084961, 14.87564468383789, 14.525650024414062, 2.5742568969726562, 33.234134674072266, 15.951595306396484, 15.899421691894531, 67.45762634277344, 64.2056884765625, -10.657821655273438, 21.230384826660156, 22.449462890625, 6.962715148925781, 37.44349670410156, 27.749801635742188, 19.774429321289062, 27.62107276916504, -5.7118072509765625, 63.05043029785156, 18.490020751953125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000234.npy"}
|
|
{"epoch": 0.3436123348017621, "step": 235, "batch_size": 64, "mean": 22.86014175415039, "std": 22.96712875366211, "min": -18.03559112548828, "p10": -1.917087554931639, "median": 16.808202743530273, "p90": 51.13296737670898, "max": 89.03157043457031, "pos_frac": 0.84375, "sample": [11.099403381347656, 28.130233764648438, 2.7014923095703125, 19.45508575439453, 29.828645706176758, 89.03157043457031, 12.226516723632812, 57.759552001953125, 19.108951568603516, 32.62538146972656, -18.03559112548828, -0.381805419921875, 7.0638885498046875, 45.859130859375, 12.44097900390625, -9.005859375, 18.879135131835938, 5.598335266113281, 28.38593292236328, -5.225563049316406, -3.1031570434570312, 8.4189453125, 11.957620620727539, -9.05517578125, 50.70594024658203, 50.684326171875, 10.3232421875, -0.152252197265625, 39.50914001464844, 52.28855895996094, 30.213302612304688, 18.880783081054688, 40.67039489746094, 13.035209655761719, 17.865985870361328, 29.54546356201172, 19.08954429626465, 47.96855926513672, 30.555076599121094, 28.823333740234375, 14.848159790039062, 43.33160400390625, 67.67401123046875, 10.211273193359375, 50.1212158203125, 35.80609130859375, 46.209251403808594, -2.5750656127929688, 4.881034851074219, 10.385841369628906, 75.83930969238281, 51.31597900390625, 2.6906166076660156, -0.1637420654296875, 10.659000396728516, -4.958616256713867, 5.4644317626953125, 15.750419616699219, 13.000091552734375, 5.533315658569336, 24.164337158203125, 15.738666534423828, 83.18130493164062, 8.170295715332031], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000235.npy"}
|
|
{"epoch": 0.34508076358296624, "step": 236, "batch_size": 64, "mean": 25.48858070373535, "std": 23.67022132873535, "min": -16.89471435546875, "p10": -2.0655002593994136, "median": 22.449556350708008, "p90": 63.003779602050784, "max": 81.48548889160156, "pos_frac": 0.859375, "sample": [64.44183349609375, 3.6033554077148438, -8.08538818359375, 36.358787536621094, 31.20827865600586, 28.72631072998047, 42.2591552734375, 7.81031608581543, 21.603702545166016, 17.970138549804688, 27.85357666015625, 76.50970458984375, 18.167741775512695, 37.901084899902344, 23.907020568847656, 35.52124786376953, 16.273611068725586, 23.29541015625, -16.89471435546875, 81.48548889160156, 15.977760314941406, -5.5594635009765625, 3.085531234741211, 67.71884155273438, 44.18745803833008, 5.0084686279296875, 10.733436584472656, 72.853515625, 42.656524658203125, 9.303976058959961, 34.32780456542969, 8.486246109008789, -4.5068359375, 35.638031005859375, 21.18924903869629, 30.394115447998047, -14.19903564453125, 64.91039276123047, 29.05347442626953, 10.102785110473633, 23.35205078125, 11.102226257324219, 13.520029067993164, 1.7263946533203125, -0.6653804779052734, 62.87004089355469, 20.217754364013672, 7.9239044189453125, 3.984180450439453, 11.094562530517578, 41.90711975097656, 61.81739807128906, 51.47492980957031, -3.124736785888672, 47.98052978515625, 29.40518569946289, 63.06109619140625, 39.852867126464844, 0.7748031616210938, 15.01397705078125, -1.798431396484375, 31.753677368164062, -2.1799583435058594, 48.92604064941406], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000236.npy"}
|
|
{"epoch": 0.3465491923641703, "step": 237, "batch_size": 64, "mean": 27.525650024414062, "std": 24.088449478149414, "min": -30.515464782714844, "p10": 1.8745109558105468, "median": 24.824689865112305, "p90": 65.76077270507814, "max": 83.3187026977539, "pos_frac": 0.90625, "sample": [24.154342651367188, 24.814258575439453, 76.11864471435547, 55.62853240966797, 1.8664932250976562, 38.45171356201172, 26.684890747070312, 28.698341369628906, 9.160449981689453, 18.20782470703125, 9.748199462890625, 21.01192855834961, 63.00428771972656, 18.072921752929688, 66.94212341308594, 30.35034942626953, 8.53164291381836, 42.75419616699219, 1.893218994140625, 43.031883239746094, 44.91917419433594, 5.323173522949219, 77.12274169921875, 72.34768676757812, -2.816009521484375, 83.3187026977539, 11.164382934570312, 11.717765808105469, 29.24073028564453, 10.196479797363281, 71.29655456542969, 20.517181396484375, 34.07734680175781, 43.21321105957031, 20.124465942382812, 26.794113159179688, 15.381584167480469, 30.14165496826172, 24.835121154785156, 14.950607299804688, 48.404083251953125, -5.611228942871094, 45.92387390136719, 33.08306884765625, 71.44279479980469, 3.968597412109375, -0.6487274169921875, 35.278480529785156, -30.515464782714844, 52.647369384765625, 10.642454147338867, 17.221200942993164, -10.676559448242188, -5.7055206298828125, 29.590049743652344, 58.244354248046875, 4.667757034301758, 40.78773498535156, 15.192756652832031, 14.717456817626953, 10.165245056152344, 39.46057891845703, 1.9578399658203125, 28.410362243652344], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000237.npy"}
|
|
{"epoch": 0.34801762114537443, "step": 238, "batch_size": 64, "mean": 24.64088249206543, "std": 20.25897979736328, "min": -12.141304016113281, "p10": 2.586801910400391, "median": 21.2393856048584, "p90": 49.528165435791024, "max": 82.74313354492188, "pos_frac": 0.953125, "sample": [6.5242919921875, 6.615875244140625, 5.243133544921875, 41.96393966674805, 12.954574584960938, 11.708038330078125, 25.36248779296875, 27.7947998046875, 12.372346878051758, 43.895111083984375, 17.709688186645508, 47.93565368652344, 17.021820068359375, 4.818294525146484, 31.422040939331055, 34.613433837890625, 36.604736328125, 2.3336563110351562, 25.975025177001953, 12.522941589355469, -3.991252899169922, 69.90716552734375, 18.016983032226562, 42.77014923095703, 0.4992656707763672, 82.74313354492188, 12.666763305664062, 24.292137145996094, 65.09771728515625, 1.167572021484375, 25.816314697265625, 10.88671875, 14.177154541015625, 30.42534637451172, 41.1195068359375, 7.686895370483398, 21.043712615966797, 27.368759155273438, 68.7672119140625, 29.635169982910156, 10.147510528564453, 16.354156494140625, 21.931991577148438, -12.141304016113281, 16.02307891845703, 3.91705322265625, 1.3464221954345703, 26.77386474609375, 32.56279754638672, 73.5896987915039, 30.90398406982422, 17.556114196777344, 21.43505859375, 46.05933380126953, -6.863838195800781, 65.49168395996094, 50.210670471191406, 23.9954776763916, 19.59625244140625, 3.1774749755859375, 35.47943115234375, 26.53548812866211, 20.914337158203125, 16.53139305114746], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000238.npy"}
|
|
{"epoch": 0.34948604992657856, "step": 239, "batch_size": 64, "mean": 25.54654312133789, "std": 21.156612396240234, "min": -15.41408920288086, "p10": 3.7882785797119145, "median": 22.45859146118164, "p90": 55.316453552246095, "max": 80.93455505371094, "pos_frac": 0.9375, "sample": [69.46001434326172, 23.39801025390625, 17.539813995361328, 32.19415283203125, 15.06821060180664, 25.82177734375, 28.641952514648438, 24.51303482055664, 4.101757049560547, 4.284599304199219, 23.008220672607422, 19.98382568359375, 7.467266082763672, 31.469772338867188, 10.254568099975586, 3.038705825805664, 52.40208435058594, 17.164451599121094, 24.067184448242188, 52.630123138427734, 49.00010681152344, 14.064483642578125, 32.87495422363281, -15.41408920288086, 13.549064636230469, 13.183588027954102, 37.38715362548828, 59.32099914550781, 24.210914611816406, 22.5538330078125, 7.823732376098633, 17.25445556640625, 55.172332763671875, 76.83549499511719, 1.9246978759765625, 22.36334991455078, 15.309898376464844, 79.4695816040039, 13.576858520507812, 44.511627197265625, 29.808589935302734, 55.37821960449219, 15.418136596679688, 31.82769775390625, -3.52972412109375, 12.036903381347656, -9.6553955078125, 3.6539306640625, -9.424003601074219, 30.396137237548828, 46.439491271972656, 28.848007202148438, 30.490203857421875, 16.005115509033203, 80.93455505371094, 25.444541931152344, 62.80955505371094, 12.558414459228516, 17.487293243408203, 30.04884910583496, 19.205291748046875, 7.259243011474609, 17.961524963378906, 12.093671798706055], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000239.npy"}
|
|
{"epoch": 0.3509544787077827, "step": 240, "batch_size": 64, "mean": 22.555252075195312, "std": 21.10706901550293, "min": -29.489578247070312, "p10": -2.7541986465454102, "median": 19.93775463104248, "p90": 53.512052917480474, "max": 64.00074768066406, "pos_frac": 0.84375, "sample": [19.018327713012695, 63.67582321166992, 18.01873016357422, 11.037796020507812, 25.881256103515625, 51.83033752441406, 3.904327392578125, 58.278961181640625, 4.225502014160156, -4.167816162109375, 21.32413101196289, 15.38494873046875, -2.535919189453125, 34.32818603515625, 49.17444610595703, 18.168458938598633, -2.7955970764160156, 13.383140563964844, 4.615196228027344, 48.842185974121094, 18.31085968017578, 63.86137390136719, 12.267570495605469, 38.98167419433594, 20.963340759277344, 13.286849975585938, -14.478706359863281, 6.027313232421875, 14.9608154296875, 64.00074768066406, 30.275482177734375, 36.10211181640625, 33.86994171142578, 24.174259185791016, -11.45660400390625, 44.64181137084961, 36.123992919921875, 20.99701690673828, -10.472373962402344, 24.405597686767578, 3.487041473388672, 41.84297561645508, -3.6214141845703125, 17.424983978271484, 41.686500549316406, -1.2905197143554688, 41.509132385253906, -2.657602310180664, 54.2327880859375, 30.560623168945312, 18.730144500732422, 59.09283447265625, 11.382293701171875, 22.598106384277344, 2.9239578247070312, 27.91008949279785, 27.929935455322266, 19.5554256439209, 62.57630920410156, 29.859298706054688, 9.751411437988281, 20.320083618164062, 18.78583526611328, -29.489578247070312], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000240.npy"}
|
|
{"epoch": 0.3524229074889868, "step": 241, "batch_size": 64, "mean": 31.69961929321289, "std": 31.25377655029297, "min": -24.563827514648438, "p10": -0.5832557678222654, "median": 27.302894592285156, "p90": 76.5036720275879, "max": 160.10690307617188, "pos_frac": 0.875, "sample": [21.63387680053711, 36.48139953613281, -7.040153503417969, 12.418645858764648, -0.35833740234375, 32.45329284667969, 53.536705017089844, 57.76475524902344, 39.315452575683594, 39.8052978515625, 48.74382400512695, 14.53961181640625, 2.879131317138672, 86.20700073242188, -5.582446098327637, 33.7515754699707, 8.428413391113281, 49.8741455078125, 52.34680938720703, 38.111854553222656, 26.79290771484375, 29.768089294433594, 11.306711196899414, 18.084625244140625, 160.10690307617188, 37.92579650878906, 21.64678955078125, 25.34239959716797, 80.63618469238281, -0.6796493530273438, 98.38102722167969, 27.051849365234375, 73.33650207519531, 18.02902603149414, 27.553939819335938, 4.815338134765625, 17.047584533691406, 33.37092590332031, 15.086906433105469, 30.685577392578125, 28.28771209716797, 19.478511810302734, 8.646368026733398, 12.642669677734375, 13.749885559082031, -5.548187255859375, 29.851829528808594, -3.2833824157714844, 39.36237335205078, 36.40995788574219, 17.806365966796875, 20.944625854492188, 16.15289306640625, 82.01509857177734, 48.674591064453125, 28.776649475097656, -22.428497314453125, 51.73851013183594, 66.56495666503906, 77.86103057861328, -24.563827514648438, 97.50470733642578, 9.268424987792969, 7.2621002197265625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000241.npy"}
|
|
{"epoch": 0.35389133627019087, "step": 242, "batch_size": 64, "mean": 24.644126892089844, "std": 21.845211029052734, "min": -19.23004150390625, "p10": 1.6598030090332034, "median": 19.669065475463867, "p90": 53.11238708496095, "max": 99.436279296875, "pos_frac": 0.9375, "sample": [-3.9956188201904297, 14.010135650634766, 9.536796569824219, 47.06852722167969, 19.614192962646484, -19.23004150390625, 12.618453979492188, 7.192771911621094, 50.717437744140625, 16.634124755859375, 5.105546951293945, 6.5867919921875, 74.5836181640625, 29.547422409057617, 23.12195587158203, 11.515266418457031, 16.17626190185547, 33.962425231933594, 62.90423583984375, 6.822969436645508, 33.91378402709961, 6.122184753417969, 35.08501434326172, 25.18645477294922, 40.09596252441406, 17.16021728515625, 42.265602111816406, 16.34326171875, 17.562299728393555, -10.775634765625, 21.407333374023438, 37.786338806152344, 19.72393798828125, -6.202392578125, 37.46678161621094, 19.181251525878906, 54.1387939453125, 99.436279296875, 16.15007781982422, 2.985870361328125, 1.8648757934570312, 24.268829345703125, 18.490982055664062, 71.11398315429688, 16.42278289794922, 4.64990234375, 49.360191345214844, 19.033729553222656, 35.38099670410156, 24.646974563598633, 0.425628662109375, 66.96372985839844, 15.220317840576172, 1.4953460693359375, 36.597625732421875, 1.5719146728515625, 28.77918243408203, 20.256839752197266, 28.924423217773438, 22.607986450195312, 7.813016891479492, 55.76123046875, 48.76792907714844, 27.279014587402344], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000242.npy"}
|
|
{"epoch": 0.355359765051395, "step": 243, "batch_size": 64, "mean": 29.49078941345215, "std": 21.84927749633789, "min": -8.895929336547852, "p10": 4.926291847229005, "median": 25.51095962524414, "p90": 61.300480270385755, "max": 101.5250015258789, "pos_frac": 0.96875, "sample": [21.834228515625, 3.2497787475585938, 22.279109954833984, 55.84269714355469, 44.048988342285156, 37.434471130371094, 13.840755462646484, 37.264434814453125, -8.895929336547852, 6.0708160400390625, 3.789947509765625, 13.482757568359375, 66.67047119140625, 11.534208297729492, 14.505332946777344, 25.86804962158203, 25.15386962890625, 67.7083740234375, 9.068916320800781, 13.172683715820312, 1.691253662109375, 2.9955806732177734, 26.786174774169922, 28.07476234436035, 4.435781478881836, 29.543411254882812, 22.74528694152832, 17.104080200195312, 26.683334350585938, 30.74774169921875, 46.81635284423828, 52.61351776123047, 7.635950088500977, 51.86601257324219, 9.907913208007812, 85.46104431152344, 47.83876037597656, 42.80958557128906, 30.416610717773438, 43.77766418457031, 58.14552688598633, 23.009239196777344, 101.5250015258789, 72.4244155883789, 14.937171936035156, 36.03935241699219, 18.454330444335938, 12.840259552001953, 62.65260314941406, -1.1430530548095703, 29.99962043762207, 28.781822204589844, 32.094024658203125, 18.707275390625, 66.42369079589844, 17.441471099853516, 40.94938659667969, 16.255706787109375, 15.606880187988281, 36.420623779296875, 23.56036376953125, 29.78295135498047, 24.628149032592773, 13.998970031738281], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000243.npy"}
|
|
{"epoch": 0.3568281938325991, "step": 244, "batch_size": 64, "mean": 25.319198608398438, "std": 21.50766372680664, "min": -12.607946395874023, "p10": 0.47381610870361346, "median": 21.22218894958496, "p90": 58.764900970458996, "max": 75.14773559570312, "pos_frac": 0.921875, "sample": [60.35028076171875, 4.850563049316406, 10.97793197631836, 21.31737518310547, 9.436813354492188, 8.037002563476562, 60.46289825439453, 35.12607192993164, -4.94805908203125, 14.145530700683594, 26.487995147705078, 32.27848815917969, 7.192535400390625, 30.48766326904297, 17.22021484375, 32.93928909301758, 0.6754322052001953, 75.14773559570312, 13.952568054199219, 18.583173751831055, -7.5303802490234375, 27.882322311401367, 23.976852416992188, -12.607946395874023, 55.06568145751953, 7.720220565795898, 16.238677978515625, 40.63072204589844, 18.844314575195312, -1.6609573364257812, 45.45344543457031, 6.542585372924805, 43.77742004394531, 52.668052673339844, 64.40045166015625, 46.18095397949219, 46.715599060058594, 69.02058410644531, 3.7824325561523438, 49.006256103515625, 25.17865753173828, -6.264631271362305, 61.624420166015625, 0.088409423828125, 36.05519104003906, 0.3874092102050781, 28.90513801574707, 11.826444625854492, 7.827484130859375, 20.703662872314453, 21.127002716064453, 19.317298889160156, 1.9564838409423828, 25.632150650024414, 9.380142211914062, 31.761219024658203, 30.0599365234375, 65.12542724609375, 1.3599510192871094, 34.796173095703125, 12.389892578125, 46.445648193359375, 10.361717224121094, 53.5546875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000244.npy"}
|
|
{"epoch": 0.35829662261380324, "step": 245, "batch_size": 64, "mean": 26.90045928955078, "std": 27.2540225982666, "min": -20.270065307617188, "p10": 2.0435630798339854, "median": 19.41887092590332, "p90": 52.18039093017579, "max": 115.78179931640625, "pos_frac": 0.9375, "sample": [7.845802307128906, 50.24717712402344, 49.78566360473633, 41.59670639038086, 20.13528060913086, 17.283893585205078, 17.10883903503418, 0.804229736328125, 18.18536376953125, 18.70246124267578, 12.856170654296875, 20.385597229003906, 46.050682067871094, 25.760997772216797, 3.7159976959228516, 33.49034881591797, 42.80079650878906, 45.07405090332031, 11.059432983398438, 7.1432647705078125, 115.78179931640625, 20.742462158203125, -5.605720520019531, 47.45118713378906, 2.95361328125, 13.033401489257812, 23.45366668701172, -13.682758331298828, 5.741186141967773, 4.957405090332031, 46.27226257324219, 33.77058410644531, 101.12528991699219, 5.6642913818359375, 44.771766662597656, 12.892105102539062, 3.6472625732421875, 13.507741928100586, 33.974098205566406, 53.0089111328125, 4.159889221191406, 9.302619934082031, 43.6778564453125, 17.531646728515625, 34.01885223388672, -20.270065307617188, 47.048065185546875, 73.43207550048828, 23.814308166503906, 46.19651794433594, 71.96156311035156, 7.708732604980469, 85.76377868652344, 7.600778579711914, 0.8817176818847656, 98.78688049316406, 24.467844009399414, 40.62062072753906, 6.186367034912109, -1.2692031860351562, 29.26944351196289, 8.95892333984375, 6.633365631103516, 1.6535415649414062], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000245.npy"}
|
|
{"epoch": 0.35976505139500736, "step": 246, "batch_size": 64, "mean": 23.103233337402344, "std": 21.114534378051758, "min": -16.053936004638672, "p10": 1.90443458557129, "median": 17.323326110839844, "p90": 55.52792663574219, "max": 80.6675033569336, "pos_frac": 0.921875, "sample": [29.299819946289062, 67.36518096923828, 28.195159912109375, 11.754974365234375, 1.1007080078125, 34.63203048706055, 80.6675033569336, -3.368377685546875, 25.587139129638672, -5.354545593261719, 6.910499572753906, 24.533058166503906, 23.276992797851562, 6.592798233032227, 30.020692825317383, 38.419654846191406, 54.47785186767578, 4.291744232177734, 7.4782867431640625, 25.93682861328125, 21.299358367919922, 52.43042755126953, 32.41291427612305, 17.05541229248047, 33.468055725097656, 4.4635772705078125, 34.67985153198242, 9.987060546875, 62.567413330078125, -2.929201126098633, 3.378438949584961, 27.038253784179688, 16.308446884155273, 10.810073852539062, 55.97795867919922, 10.271415710449219, 14.060684204101562, 5.834621429443359, 17.27104949951172, 8.517555236816406, 58.257781982421875, 41.18647766113281, 8.531242370605469, 37.0917854309082, 13.630090713500977, 44.40119934082031, 22.97342300415039, 2.9098968505859375, 2.9803848266601562, 13.547794342041016, -16.053936004638672, 17.37560272216797, 56.35716247558594, 10.050949096679688, 15.669631958007812, 19.50726890563965, 1.4735221862792969, 78.26365661621094, 31.563720703125, 48.18144226074219, 15.238960266113281, 33.51525115966797, -7.7497406005859375, 2.982074737548828], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000246.npy"}
|
|
{"epoch": 0.36123348017621143, "step": 247, "batch_size": 64, "mean": 32.50457000732422, "std": 25.603313446044922, "min": -19.49005126953125, "p10": 5.508189582824707, "median": 30.655773162841797, "p90": 64.82264251708985, "max": 103.99567413330078, "pos_frac": 0.921875, "sample": [5.194570541381836, 6.492544174194336, 23.259719848632812, 64.96803283691406, 32.50465393066406, 73.7529296875, 64.4833984375, 36.12696838378906, 16.879899978637695, 41.80049514770508, 78.2312240600586, 52.779266357421875, 35.347259521484375, 61.54307556152344, 13.10883903503418, 66.35201263427734, 80.34585571289062, 12.292415618896484, 49.34739685058594, 103.99567413330078, 27.185691833496094, -4.456853866577148, 32.322967529296875, 9.336830139160156, 8.908056259155273, 38.0670166015625, 3.2025146484375, 18.308856964111328, 54.1087646484375, 23.0177001953125, 51.27685546875, 51.500701904296875, 33.353904724121094, 29.39977264404297, 24.450790405273438, 44.516998291015625, 40.398101806640625, 29.993133544921875, 33.28923797607422, -11.351852416992188, 8.611675262451172, 31.31841278076172, 6.239967346191406, 8.409137725830078, 13.633218765258789, 13.680805206298828, 17.42874526977539, 36.001686096191406, 57.615997314453125, 40.03363800048828, 8.288002014160156, 8.420173645019531, 20.484352111816406, 95.22830200195312, 28.609771728515625, -3.3704490661621094, 28.07392120361328, 43.352325439453125, 59.10719680786133, -19.49005126953125, 54.54170227050781, -4.965217590332031, 16.760269165039062, 56.6434326171875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000247.npy"}
|
|
{"epoch": 0.36270190895741555, "step": 248, "batch_size": 64, "mean": 24.356069564819336, "std": 19.23394203186035, "min": -5.182277679443359, "p10": 3.9992078781127933, "median": 17.58814525604248, "p90": 56.45011711120606, "max": 77.38822174072266, "pos_frac": 0.96875, "sample": [32.623470306396484, 48.373291015625, 8.649166107177734, 16.159259796142578, 36.41210174560547, 0.8697834014892578, 10.2210693359375, 37.43410110473633, 3.444244384765625, 14.321258544921875, 7.698127746582031, 66.31998443603516, 15.627212524414062, 18.912267684936523, 42.178165435791016, 4.129663467407227, 12.504117965698242, 4.967519760131836, 30.437986373901367, 22.63461685180664, 14.532760620117188, 14.777801513671875, 17.741657257080078, 59.705352783203125, 12.4254150390625, 23.18595314025879, 35.215087890625, 40.95664978027344, 38.892051696777344, 60.66162109375, 18.962158203125, 13.452316284179688, 16.279563903808594, 31.0841064453125, 20.855085372924805, 52.79646301269531, 0.8837127685546875, 9.531166076660156, 17.716564178466797, 13.616153717041016, 16.08721923828125, 54.741092681884766, 31.964820861816406, 77.38822174072266, 24.93427276611328, 17.459726333618164, 62.541465759277344, 49.79020690917969, 19.81389617919922, 15.701393127441406, 0.06910324096679688, 4.406049728393555, 58.33575439453125, 31.690277099609375, 57.18255615234375, 30.90558624267578, 6.710906982421875, 13.407615661621094, 13.346733093261719, 10.737510681152344, 3.94329833984375, 15.70833969116211, -5.182277679443359, -0.08432579040527344], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000248.npy"}
|
|
{"epoch": 0.3641703377386197, "step": 249, "batch_size": 64, "mean": 20.92279624938965, "std": 20.92414665222168, "min": -13.514167785644531, "p10": -3.8161849975585915, "median": 18.073017120361328, "p90": 51.985609436035155, "max": 74.16664123535156, "pos_frac": 0.828125, "sample": [3.4135875701904297, 37.286834716796875, 20.67205047607422, 14.483373641967773, 9.898826599121094, -4.803123474121094, 5.94207763671875, 16.36376953125, 25.755203247070312, 25.677093505859375, 24.471176147460938, 13.604522705078125, 7.225370407104492, 32.51994323730469, 4.4287109375, -13.514167785644531, 7.787784576416016, 51.484588623046875, 25.419883728027344, -1.5133285522460938, 19.0323486328125, 35.141605377197266, 5.025241851806641, 31.596282958984375, 1.3579864501953125, 45.148094177246094, 1.5603809356689453, 48.57450866699219, 52.20033264160156, 48.95615768432617, -5.461345672607422, 24.75326919555664, 17.113685607910156, 16.32898712158203, 13.654569625854492, 24.529518127441406, 26.675514221191406, -5.349220275878906, 30.113018035888672, 54.316978454589844, 40.54949951171875, -0.176727294921875, 74.16664123535156, -10.126754760742188, -5.4258575439453125, -0.22603225708007812, 30.58839225769043, 3.0184478759765625, 21.282142639160156, 9.206024169921875, 54.019126892089844, 35.44312286376953, 2.6375808715820312, 63.01483154296875, 50.652137756347656, 55.11894989013672, 6.1932525634765625, -13.3912353515625, 11.110458374023438, 20.1824951171875, -0.26505279541015625, 55.716217041015625, 41.08235168457031, 2.8168678283691406], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000249.npy"}
|
|
{"epoch": 0.3656387665198238, "step": 250, "batch_size": 64, "mean": 25.81413459777832, "std": 18.444185256958008, "min": -20.155029296875, "p10": 3.6458049774169923, "median": 26.776209831237793, "p90": 46.64247131347656, "max": 72.4102783203125, "pos_frac": 0.9375, "sample": [42.470436096191406, 26.379209518432617, 12.591377258300781, -13.445613861083984, -20.155029296875, 39.34693908691406, 6.256385803222656, 40.93983459472656, 24.361841201782227, 29.760330200195312, 22.637371063232422, 30.16351318359375, 17.063369750976562, 36.46281433105469, 10.488197326660156, 25.809715270996094, 11.996665954589844, 23.4354248046875, 45.93380355834961, 33.987823486328125, 32.784149169921875, -4.23822021484375, 3.8048019409179688, 45.091209411621094, 4.716636657714844, 39.874244689941406, 19.99022674560547, 10.360414505004883, 28.13178253173828, 34.30706787109375, 22.74357032775879, 3.5776634216308594, 5.213035583496094, 64.00879669189453, -5.511020660400391, 49.83563232421875, 44.590576171875, 24.021240234375, 2.4156417846679688, 24.02104949951172, 29.28668212890625, 51.55860900878906, 32.778900146484375, 33.89141845703125, 48.684051513671875, 21.95631980895996, 32.28173065185547, 30.4110107421875, 27.17321014404297, 17.539794921875, 46.94618606567383, 12.115184783935547, 15.264425277709961, 45.55564880371094, 6.760383605957031, 30.86779022216797, 72.4102783203125, 65.36186218261719, 33.68883514404297, 34.43141174316406, 22.432815551757812, 0.275604248046875, 5.797698974609375, 38.41188049316406], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000250.npy"}
|
|
{"epoch": 0.3671071953010279, "step": 251, "batch_size": 64, "mean": 25.741519927978516, "std": 20.797607421875, "min": -5.296670913696289, "p10": -0.7325302124023417, "median": 24.485797882080078, "p90": 55.7824691772461, "max": 72.34446716308594, "pos_frac": 0.890625, "sample": [54.15087127685547, 4.221488952636719, 68.98190307617188, 40.17266845703125, 42.59912872314453, 5.8397369384765625, 10.365386962890625, 56.652923583984375, 45.360877990722656, 26.287141799926758, 56.387115478515625, 1.2410049438476562, 36.51303482055664, 6.912609100341797, 27.359119415283203, -3.844573974609375, -1.5783309936523438, 72.34446716308594, 17.54090690612793, 33.75366973876953, 11.180789947509766, 34.421051025390625, 10.476276397705078, 48.71381378173828, 8.265037536621094, 13.596939086914062, -2.4530277252197266, -5.296670913696289, 30.956512451171875, 67.21954345703125, 10.341934204101562, -2.5866775512695312, 44.09906005859375, 13.706043243408203, 2.9639530181884766, 39.32927703857422, 3.509960174560547, 14.755645751953125, 24.684120178222656, 44.614620208740234, 24.2874755859375, 33.4290885925293, 16.26186180114746, 48.15667724609375, 4.2627105712890625, 57.26023864746094, 19.51080322265625, 62.472373962402344, 23.19458770751953, 26.20807647705078, -2.1134281158447266, 10.193592071533203, 48.76600646972656, 36.93717575073242, 54.37162780761719, 24.72757339477539, 5.971576690673828, 43.508544921875, 20.203123092651367, -1.8891334533691406, 18.04259490966797, 26.2840576171875, 6.354427337646484, 27.296371459960938], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000251.npy"}
|
|
{"epoch": 0.368575624082232, "step": 252, "batch_size": 64, "mean": 25.683515548706055, "std": 19.699735641479492, "min": -22.630157470703125, "p10": 2.879988098144531, "median": 23.110946655273438, "p90": 53.72540359497071, "max": 81.23387145996094, "pos_frac": 0.9375, "sample": [53.05427551269531, 37.78865051269531, 17.51276397705078, 55.763580322265625, 23.959991455078125, 6.175811767578125, 22.15534210205078, 29.688323974609375, 15.314628601074219, 53.98420715332031, 25.058151245117188, 2.9883270263671875, 41.20646667480469, 4.5748748779296875, 21.040489196777344, 41.329002380371094, 14.720165252685547, 20.592430114746094, 35.80188751220703, 19.155845642089844, 32.718223571777344, 54.49519348144531, 27.266300201416016, 15.559741973876953, 39.160308837890625, -0.8323707580566406, -4.022520065307617, 2.83355712890625, 12.400482177734375, 26.83991241455078, 3.5040664672851562, 28.673095703125, 53.12152862548828, 22.724143981933594, 33.67431640625, 6.9061737060546875, 36.73306655883789, 28.741230010986328, 22.635597229003906, 71.89602661132812, 7.747831344604492, 1.5472488403320312, 35.77861022949219, 21.397048950195312, 32.15138244628906, 81.23387145996094, 55.96278762817383, 6.065940856933594, 20.027462005615234, 44.27493667602539, 16.973663330078125, -22.630157470703125, 28.749496459960938, 38.167213439941406, 15.76849365234375, 20.720447540283203, 0.41178226470947266, 23.49774932861328, 72.72721862792969, 20.077632904052734, 32.491695404052734, -3.3478012084960938, 12.456794738769531, 24.600379943847656], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000252.npy"}
|
|
{"epoch": 0.3700440528634361, "step": 253, "batch_size": 64, "mean": 24.096176147460938, "std": 23.356128692626953, "min": -29.50146484375, "p10": -0.6898841857910151, "median": 21.87054443359375, "p90": 56.18104324340821, "max": 90.86932373046875, "pos_frac": 0.875, "sample": [12.749715805053711, 21.267295837402344, 35.99620056152344, 30.257118225097656, 5.911949157714844, 9.540092468261719, 56.826927185058594, 18.381858825683594, 53.22773742675781, 13.26148796081543, 23.767669677734375, 62.665618896484375, 47.96379089355469, 3.3196029663085938, 35.44352722167969, 21.04498291015625, 51.16481018066406, 12.803985595703125, 14.537332534790039, 25.604827880859375, -12.397611618041992, -29.50146484375, 31.00152587890625, 42.40321350097656, 43.186737060546875, 4.686676025390625, 90.86932373046875, -0.9186859130859375, 2.7916412353515625, 5.422374725341797, 4.918373107910156, 26.910097122192383, 27.420818328857422, 32.037811279296875, 54.673980712890625, 66.99911499023438, 8.443840026855469, -2.4990711212158203, 68.31767272949219, 22.473793029785156, 14.128837585449219, 36.31794738769531, 35.827476501464844, 58.15447998046875, 39.85235595703125, 7.483917236328125, 35.91259765625, 42.23469543457031, -0.15601348876953125, -5.070077896118164, 12.097038269042969, 22.501625061035156, -21.05364990234375, 3.932373046875, 1.8040313720703125, 14.57760238647461, 30.002960205078125, 33.95207214355469, 67.33567810058594, 8.826446533203125, -1.4004707336425781, 0.65155029296875, 44.308555603027344, 16.956512451171875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000253.npy"}
|
|
{"epoch": 0.37151248164464024, "step": 254, "batch_size": 64, "mean": 24.636829376220703, "std": 22.709455490112305, "min": -13.478515625, "p10": -0.7494081497192374, "median": 20.53754425048828, "p90": 51.56536941528321, "max": 87.48858642578125, "pos_frac": 0.890625, "sample": [31.6702880859375, 1.2179832458496094, 33.189735412597656, 12.825475692749023, 40.64435577392578, 35.60084915161133, 5.2562713623046875, -1.8152332305908203, 2.3504486083984375, 15.590213775634766, 16.713890075683594, 55.83097839355469, 51.295440673828125, 22.326566696166992, 0.5062465667724609, -11.431133270263672, 24.769882202148438, 14.799495697021484, 31.012725830078125, 39.48866271972656, 2.308483123779297, 50.14567565917969, 84.2740478515625, 51.681053161621094, 44.8028564453125, 1.8476409912109375, 36.2259521484375, 49.61610412597656, 0.11711502075195312, 12.312623977661133, 12.720605850219727, 50.71751403808594, 21.06438636779785, -13.478515625, 3.4732818603515625, 49.82978820800781, 10.795135498046875, 59.99577331542969, 16.14529800415039, 36.6250114440918, 7.7290802001953125, -12.144012451171875, 21.415922164916992, 57.73969268798828, -1.1207752227783203, 39.479270935058594, 2.7476139068603516, 34.591033935546875, 12.242300033569336, 19.85259246826172, 48.098419189453125, 20.455062866210938, 20.620025634765625, 15.885360717773438, -5.5195465087890625, 87.48858642578125, 18.47625732421875, 50.4166259765625, 21.74469757080078, -1.4968719482421875, 8.196609497070312, 63.719642639160156, 30.338096618652344, 12.738395690917969], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000254.npy"}
|
|
{"epoch": 0.37298091042584436, "step": 255, "batch_size": 64, "mean": 27.34168243408203, "std": 28.56133460998535, "min": -56.669097900390625, "p10": 0.02091140747070408, "median": 21.643768310546875, "p90": 69.71080474853518, "max": 101.58807373046875, "pos_frac": 0.890625, "sample": [101.58807373046875, 21.220123291015625, 8.132858276367188, 11.530044555664062, 24.422821044921875, 24.470993041992188, 12.340787887573242, 5.7871246337890625, -56.669097900390625, 16.004741668701172, 42.47669219970703, 17.050437927246094, 8.20465087890625, 8.18979263305664, 45.11675262451172, 26.562694549560547, -15.344772338867188, -11.260406494140625, 22.067413330078125, 57.43782424926758, 51.86785888671875, 12.979616165161133, 26.736572265625, 11.177181243896484, 34.60902786254883, 3.114715576171875, -0.3937950134277344, 85.66970825195312, 8.690452575683594, 34.43025207519531, 16.408096313476562, 39.83635330200195, 27.86951446533203, 72.63481140136719, 79.31295776367188, 43.69972229003906, 56.2266960144043, 34.37335205078125, 45.89225769042969, 92.89071655273438, 3.2316741943359375, -8.875768661499023, 36.83711624145508, 33.866085052490234, 16.327686309814453, 23.698036193847656, -0.3812408447265625, 13.89706802368164, 4.422698974609375, 78.01765441894531, 83.0201416015625, 15.063352584838867, 0.9592666625976562, 9.060096740722656, 6.363578796386719, 18.9818115234375, 41.33794403076172, 44.82389831542969, 15.440208435058594, 32.93589782714844, -6.294639587402344, 14.924592971801758, 61.966835021972656, 62.88812255859375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000255.npy"}
|
|
{"epoch": 0.3744493392070485, "step": 256, "batch_size": 64, "mean": 27.345840454101562, "std": 25.15676498413086, "min": -33.968597412109375, "p10": 2.101638412475586, "median": 20.493896484375, "p90": 57.833874511718754, "max": 106.14176940917969, "pos_frac": 0.921875, "sample": [43.83059310913086, 106.14176940917969, 16.336532592773438, 41.00764465332031, 32.90582275390625, 56.27587890625, 13.654993057250977, 14.043136596679688, -8.476582527160645, 23.774391174316406, 17.67910385131836, 72.48108673095703, 44.260101318359375, 44.55914306640625, 43.805484771728516, -3.7725677490234375, 18.591068267822266, 38.15726852416992, -4.276924133300781, 6.162702560424805, 16.507919311523438, 19.016143798828125, 4.534053802490234, 38.634765625, 26.984729766845703, 40.223876953125, 17.388214111328125, 2.1117935180664062, 10.20857048034668, 21.85472869873047, 53.94915771484375, 2.9711990356445312, 17.57332992553711, 38.03654861450195, 0.47171783447265625, 42.355323791503906, 2.1177444458007812, 13.296478271484375, 28.053329467773438, 15.446762084960938, -33.968597412109375, 21.786468505859375, 6.4462127685546875, 19.090805053710938, 16.0484619140625, 2.0972862243652344, 14.258323669433594, 41.931365966796875, 10.17205810546875, 36.354164123535156, 10.255821228027344, 37.96385955810547, 23.330726623535156, 58.5015869140625, 60.50456237792969, 94.40005493164062, 19.201324462890625, 10.438911437988281, 79.53279113769531, 54.069427490234375, 31.92560577392578, 30.56088638305664, -2.027555465698242, 78.38211059570312], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000256.npy"}
|
|
{"epoch": 0.37591776798825255, "step": 257, "batch_size": 64, "mean": 30.103281021118164, "std": 26.88237190246582, "min": -12.153640747070312, "p10": -1.5420700073242186, "median": 29.766724586486816, "p90": 69.0323471069336, "max": 106.3148193359375, "pos_frac": 0.875, "sample": [55.23321533203125, 3.658039093017578, 33.46302032470703, 34.57613754272461, 69.8831558227539, 4.377128601074219, 51.94959259033203, 58.02781677246094, 36.743953704833984, 26.355178833007812, -7.808111190795898, 2.036346435546875, 14.055397033691406, 65.02925109863281, 35.74585723876953, 8.977394104003906, 38.47465515136719, 34.88279724121094, 3.1442604064941406, 81.18411254882812, 23.344627380371094, 78.39363098144531, 69.45832824707031, -9.045671463012695, 33.86501693725586, 33.249393463134766, 16.831165313720703, -5.9580230712890625, 3.6340866088867188, 106.3148193359375, 55.68340301513672, 33.55784606933594, 52.54613494873047, 78.69622802734375, 33.280494689941406, 4.07861328125, 30.41636085510254, 5.598611831665039, 40.681793212890625, -1.5532302856445312, 10.805221557617188, 55.61761474609375, -7.530124664306641, 58.257076263427734, 8.585289001464844, 29.117088317871094, 21.06316375732422, 20.663955688476562, 68.03839111328125, 52.657920837402344, 28.671783447265625, 33.98719024658203, 16.651512145996094, 36.66790008544922, 20.47283172607422, 81.31558227539062, 12.212738037109375, -2.565929412841797, 10.646873474121094, -12.153640747070312, -1.5160293579101562, 7.025672912597656, 9.308425903320312, 35.546630859375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000257.npy"}
|
|
{"epoch": 0.37738619676945667, "step": 258, "batch_size": 64, "mean": 26.426227569580078, "std": 27.432861328125, "min": -44.445587158203125, "p10": 0.47082748413085984, "median": 24.691619873046875, "p90": 62.84750442504883, "max": 97.5058364868164, "pos_frac": 0.90625, "sample": [47.51805114746094, 5.808498382568359, 4.057018280029297, 46.875244140625, 15.927230834960938, 35.142311096191406, 77.67838287353516, -18.676422119140625, 15.44268798828125, 4.401329040527344, -3.276639938354492, 0.28170013427734375, 34.09844207763672, 30.57537078857422, 25.915468215942383, 0.9121246337890625, 5.4476318359375, 8.553642272949219, 51.99922180175781, 12.696495056152344, 4.4294891357421875, 71.35486602783203, 71.81521606445312, 15.483078002929688, 55.963653564453125, 40.17292785644531, 42.52911376953125, 12.009603500366211, 14.170692443847656, 64.9633560180664, 35.62012481689453, 29.964324951171875, 54.6806640625, 5.0657806396484375, -10.782833099365234, 18.433670043945312, 75.72930908203125, -44.445587158203125, 14.221244812011719, 15.044706344604492, 61.106834411621094, 5.2836151123046875, 5.516387939453125, 45.684356689453125, 12.567825317382812, -39.895538330078125, 97.5058364868164, 45.676414489746094, -11.50328254699707, 10.694869995117188, 24.881439208984375, 4.356803894042969, 39.328948974609375, 58.27802276611328, 63.593505859375, 48.32330322265625, 39.44007110595703, 36.77105712890625, 43.00458526611328, 31.707828521728516, 29.766494750976562, 9.630184173583984, 24.501800537109375, 17.256057739257812], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000258.npy"}
|
|
{"epoch": 0.3788546255506608, "step": 259, "batch_size": 64, "mean": 23.946025848388672, "std": 21.142629623413086, "min": -27.04534149169922, "p10": 0.5028085708618171, "median": 19.7800350189209, "p90": 48.131640625, "max": 76.01190185546875, "pos_frac": 0.90625, "sample": [44.65584182739258, -6.615211486816406, 37.97248077392578, 47.66747283935547, 14.546180725097656, -27.04534149169922, 48.23768615722656, 47.88420104980469, 45.83465576171875, 31.22763442993164, 6.406124114990234, 15.08935546875, 30.831764221191406, 13.796279907226562, 13.824554443359375, 10.734184265136719, 23.64899444580078, 5.575927734375, 69.71891784667969, 41.98711395263672, 36.615943908691406, 6.3838348388671875, 6.42242431640625, 24.369171142578125, 34.78126525878906, 4.550693511962891, 18.392635345458984, 13.23086166381836, 76.01190185546875, 1.1513214111328125, 13.426055908203125, 8.674427032470703, 72.895263671875, 43.826805114746094, -0.6759872436523438, 20.4097900390625, 35.843177795410156, 60.61768341064453, -1.3183517456054688, 60.19525909423828, 13.034919738769531, 3.3223419189453125, 41.69349670410156, 24.821590423583984, 20.615074157714844, 5.604545593261719, 41.079864501953125, 37.183929443359375, -3.5710906982421875, 23.23058319091797, -0.23975563049316406, 36.565940856933594, 60.47661590576172, 10.664291381835938, 33.20196533203125, 0.22487449645996094, 9.232118606567383, 39.82518768310547, 19.150279998779297, 5.288787841796875, 15.080394744873047, 5.924427032470703, 24.93030548095703, 13.421897888183594], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000259.npy"}
|
|
{"epoch": 0.3803230543318649, "step": 260, "batch_size": 64, "mean": 27.156875610351562, "std": 25.070676803588867, "min": -13.670892715454102, "p10": 1.2839290618896493, "median": 24.836669921875, "p90": 59.13706855773926, "max": 100.586181640625, "pos_frac": 0.90625, "sample": [24.796180725097656, -11.734926223754883, -0.9280929565429688, 2.403654098510742, 47.25763702392578, 25.467487335205078, 43.75559997558594, 48.61700439453125, 28.045654296875, 10.379310607910156, 13.16855239868164, 46.220767974853516, 13.52780532836914, 21.194580078125, 30.400291442871094, 100.586181640625, 27.453588485717773, 17.20415496826172, 7.0007171630859375, 60.146881103515625, -13.670892715454102, 12.583724975585938, 16.79184913635254, 58.9539909362793, -3.7187957763671875, 7.294303894042969, 40.41019821166992, 4.876983642578125, 32.63496017456055, 41.36410903930664, -0.20801544189453125, 56.50685119628906, 38.40277862548828, 44.56053161621094, 35.95415115356445, 52.55778884887695, 95.45592498779297, 24.91718292236328, 0.9141120910644531, 17.933013916015625, -11.455745697021484, 27.861711502075195, 37.47882843017578, 4.8930206298828125, 35.80586242675781, 2.2604808807373047, 9.566421508789062, 12.829124450683594, 37.41920471191406, 33.96879577636719, 31.12860107421875, 9.437088012695312, 2.1468353271484375, 85.46257019042969, 24.877159118652344, 13.19818115234375, 3.77471923828125, 23.294418334960938, 5.693532943725586, 76.4311752319336, 14.784408569335938, 68.54438018798828, 9.945987701416016, 59.21553039550781], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000260.npy"}
|
|
{"epoch": 0.38179148311306904, "step": 261, "batch_size": 64, "mean": 28.690322875976562, "std": 26.597740173339844, "min": -11.97327995300293, "p10": -2.721918106079101, "median": 23.389793395996094, "p90": 64.23923873901367, "max": 137.51593017578125, "pos_frac": 0.875, "sample": [77.14154052734375, 33.816551208496094, 15.235408782958984, -2.3847808837890625, 16.942913055419922, 137.51593017578125, 12.532979965209961, 70.07997131347656, 5.551750183105469, 25.68383026123047, 22.532312393188477, 10.508220672607422, 1.633209228515625, 68.57516479492188, 8.486564636230469, 29.69776153564453, 14.893062591552734, 32.759857177734375, 23.275375366210938, -6.745927810668945, 15.401092529296875, 26.646438598632812, 17.47600746154785, 18.209861755371094, 38.57355499267578, 60.89281463623047, 46.22935485839844, 20.593826293945312, 64.5863265991211, 16.357994079589844, -6.107898712158203, 23.50421142578125, 5.282907485961914, 36.68960952758789, 25.875274658203125, -4.452795028686523, 72.62271118164062, 31.15247344970703, 49.53208923339844, 19.998367309570312, 79.66571044921875, 32.11263656616211, 27.098373413085938, 63.42936706542969, 16.754484176635742, 46.20893859863281, -6.160663604736328, 44.56559371948242, 8.487838745117188, 15.139122009277344, 50.03070068359375, 51.64994812011719, 7.963081359863281, -2.866405487060547, -11.97327995300293, 34.07638931274414, 46.61088562011719, -4.918973922729492, 32.46720886230469, 35.248748779296875, 17.518577575683594, 14.632827758789062, 59.44978713989258, 2.1937637329101562], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000261.npy"}
|
|
{"epoch": 0.3832599118942731, "step": 262, "batch_size": 64, "mean": 25.835268020629883, "std": 21.952381134033203, "min": -29.028244018554688, "p10": 0.23545761108398588, "median": 25.458702087402344, "p90": 49.429504013061525, "max": 91.57423400878906, "pos_frac": 0.890625, "sample": [25.939189910888672, 34.408843994140625, 21.642974853515625, 31.098649978637695, -6.4638519287109375, 35.90205383300781, 76.97607421875, 45.25791931152344, 7.3737335205078125, 21.234375, -10.239828109741211, -29.028244018554688, 8.69003677368164, 49.76109313964844, 11.160858154296875, -18.690895080566406, 35.1006965637207, 8.705192565917969, -6.125579833984375, 28.9678955078125, 27.950138092041016, 20.38301658630371, -0.39342498779296875, 34.44012451171875, 33.50856018066406, 16.697677612304688, 4.597833633422852, 41.89073181152344, 52.62711715698242, 44.58113098144531, 6.690349578857422, 12.201942443847656, 21.455718994140625, -6.2266387939453125, 37.04387664794922, 27.930946350097656, 25.875198364257812, 24.459640502929688, 23.99547576904297, 48.65579605102539, 62.386077880859375, 36.288780212402344, 33.607994079589844, 80.808837890625, 18.484590530395508, 53.078041076660156, 29.866287231445312, 8.443016052246094, 18.906936645507812, 16.804956436157227, 24.801925659179688, 29.990936279296875, 14.853225708007812, 91.57423400878906, 6.777435302734375, 1.702850341796875, 43.863441467285156, 48.105682373046875, 31.347774505615234, 43.856849670410156, 17.427980422973633, 19.525527954101562, 25.798324584960938, 25.11907958984375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000262.npy"}
|
|
{"epoch": 0.38472834067547723, "step": 263, "batch_size": 64, "mean": 30.601646423339844, "std": 26.217376708984375, "min": -13.791454315185547, "p10": 3.6238365173339853, "median": 26.123005867004395, "p90": 67.71236267089844, "max": 115.3145751953125, "pos_frac": 0.9375, "sample": [64.70477294921875, 1.7556915283203125, 23.03497314453125, 21.5521240234375, 30.057098388671875, 26.60704803466797, 39.920265197753906, 20.897323608398438, 3.203826904296875, 34.09933090209961, -7.23419189453125, 54.92939758300781, 14.526762008666992, 53.99580383300781, 31.266220092773438, 13.209259033203125, 8.595653533935547, 48.72242736816406, 5.926216125488281, 13.82513427734375, -6.752740859985352, 4.879219055175781, 22.911407470703125, 73.51509094238281, -7.580423355102539, 15.175849914550781, 51.10760498046875, 25.63896369934082, 4.603858947753906, 16.306903839111328, 7.238273620605469, 4.634899139404297, 94.6734390258789, 6.849985122680664, 14.51846694946289, 69.41252136230469, 33.94602966308594, 15.945846557617188, 64.89310455322266, 13.16802978515625, 8.774890899658203, 34.050575256347656, 43.523895263671875, 37.261131286621094, 80.4212417602539, 41.89454650878906, 68.92061614990234, 15.130172729492188, 38.283111572265625, 42.722755432128906, 30.51923370361328, 55.76781463623047, 75.27815246582031, -13.791454315185547, 5.048957824707031, 47.23527526855469, 24.515213012695312, 10.825820922851562, 40.83152770996094, 39.68378448486328, 45.494903564453125, 115.3145751953125, 40.731117248535156, 1.3861236572265625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000263.npy"}
|
|
{"epoch": 0.38619676945668135, "step": 264, "batch_size": 64, "mean": 24.097835540771484, "std": 22.14469337463379, "min": -27.62053680419922, "p10": 0.9869392395019538, "median": 20.273197174072266, "p90": 62.068843078613284, "max": 72.20577239990234, "pos_frac": 0.90625, "sample": [39.58421325683594, 11.428398132324219, 10.91085433959961, 47.82855224609375, 30.957637786865234, -7.639137268066406, 4.522895812988281, 69.81009674072266, 13.331165313720703, 62.58177185058594, 7.167724609375, 12.098592758178711, 8.611848831176758, -1.9552078247070312, 31.36490249633789, 21.09064483642578, 70.97991943359375, 12.603927612304688, 27.32697296142578, 67.99324035644531, 19.74554443359375, 20.376220703125, 5.455535888671875, 23.53805923461914, 14.788154602050781, 36.557525634765625, 18.564064025878906, 24.303077697753906, 11.481147766113281, 25.499465942382812, 8.690017700195312, -7.295379638671875, 31.306997299194336, 47.24652099609375, 1.767608642578125, 58.186439514160156, 20.17017364501953, 5.750434875488281, 35.01184844970703, 49.47162628173828, 24.252086639404297, 18.06353759765625, 29.024383544921875, -0.5931472778320312, 9.211555480957031, 68.91850280761719, 35.98802185058594, -27.62053680419922, 11.151535034179688, 13.546592712402344, 35.74379348754883, 20.81863021850586, 19.74994659423828, 63.18309020996094, 4.815540313720703, 27.929126739501953, 35.637176513671875, 4.0746612548828125, -6.149578094482422, 60.87200927734375, 72.20577239990234, 22.851200103759766, 6.7210693359375, 0.6523666381835938], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000264.npy"}
|
|
{"epoch": 0.3876651982378855, "step": 265, "batch_size": 64, "mean": 25.292123794555664, "std": 25.837305068969727, "min": -18.707897186279297, "p10": -0.47680587768554616, "median": 20.27764129638672, "p90": 66.78379592895507, "max": 97.77685546875, "pos_frac": 0.890625, "sample": [1.9214935302734375, 97.77685546875, 72.16172790527344, 40.76377868652344, 23.990428924560547, -8.691644668579102, 21.390853881835938, 0.6749496459960938, 88.2987060546875, 29.219696044921875, 66.87370300292969, 2.4890518188476562, 5.961994171142578, 11.69247817993164, 0.2314453125, 26.11849594116211, 27.77947998046875, -18.707897186279297, 39.56800079345703, 25.701507568359375, 45.273406982421875, 77.12577819824219, -4.696624755859375, 83.42984008789062, -11.661941528320312, 52.616363525390625, 16.70355224609375, 66.57401275634766, 16.93399429321289, 80.26641082763672, 8.07796859741211, 13.847824096679688, 16.667943954467773, 22.226734161376953, 6.0147247314453125, 2.7953262329101562, 3.5895423889160156, 7.8822021484375, 46.20282745361328, 1.5479888916015625, 29.38011932373047, 13.608535766601562, 17.515066146850586, 32.78712463378906, 13.56637954711914, 8.889228820800781, -7.177986145019531, 31.797773361206055, 0.7993927001953125, 16.333724975585938, 19.1644287109375, -0.7803421020507812, 51.836181640625, 44.59911346435547, 31.16950225830078, 31.53774070739746, 37.97284698486328, 11.496711730957031, 25.082279205322266, -1.2561378479003906, 28.108224868774414, 14.251827239990234, 24.973182678222656, 36.40805435180664], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000265.npy"}
|
|
{"epoch": 0.3891336270190896, "step": 266, "batch_size": 64, "mean": 24.757902145385742, "std": 22.67148208618164, "min": -19.329666137695312, "p10": -1.4016380310058585, "median": 19.418917655944824, "p90": 57.422105407714845, "max": 71.33233642578125, "pos_frac": 0.859375, "sample": [17.18353271484375, 62.12779235839844, 19.562150955200195, 19.275684356689453, 38.773582458496094, 26.06238555908203, 1.9921722412109375, 54.5716552734375, 28.30291748046875, 70.54505920410156, 70.47845458984375, 57.20172119140625, 5.693046569824219, 46.53734588623047, -2.083251953125, 13.535621643066406, 39.541690826416016, 26.955535888671875, 39.07155990600586, 10.724746704101562, 5.523380279541016, -6.539569854736328, -19.329666137695312, 70.2994613647461, 6.7488555908203125, 46.73956298828125, -0.5797500610351562, 11.65985107421875, 0.8225364685058594, 39.655067443847656, 71.33233642578125, 29.51416778564453, 58.151123046875, 56.41935729980469, 45.29142761230469, 16.259613037109375, 35.95212936401367, 44.3775634765625, 14.755882263183594, 16.78504180908203, 25.222267150878906, 13.1553955078125, 22.1339054107666, 24.9276180267334, 40.21364974975586, 7.219600677490234, 1.7377395629882812, -1.753875732421875, 57.51655578613281, 49.035621643066406, -7.90869140625, 3.1707839965820312, 17.479217529296875, 10.557235717773438, -0.14726829528808594, 17.264968872070312, 15.91859245300293, 3.2954330444335938, -9.537788391113281, 35.13701629638672, 35.41181945800781, 26.240814208984375, -3.693826675415039, 12.0172119140625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000266.npy"}
|
|
{"epoch": 0.39060205580029367, "step": 267, "batch_size": 64, "mean": 27.410789489746094, "std": 22.108509063720703, "min": -7.691558837890625, "p10": 2.883445739746094, "median": 24.24208641052246, "p90": 54.281617736816415, "max": 103.44767761230469, "pos_frac": 0.921875, "sample": [34.284461975097656, 16.068519592285156, 27.056602478027344, 1.2364845275878906, 34.09747314453125, 5.147344589233398, 48.408721923828125, 35.132415771484375, 20.22138214111328, 17.934452056884766, 5.8787689208984375, -7.691558837890625, 3.371307373046875, 47.94303894042969, 15.225517272949219, 17.786521911621094, 13.290725708007812, 61.92235565185547, 51.772003173828125, 103.44767761230469, 17.27820587158203, 23.61646270751953, 38.98352813720703, -5.6884765625, 2.6743621826171875, 30.968923568725586, 36.31280517578125, 11.704339981079102, 62.68470764160156, 65.357421875, 27.72125244140625, 52.34503936767578, 64.10205078125, 33.793914794921875, 11.64048957824707, 16.599945068359375, -6.4454345703125, 15.284141540527344, 6.248119354248047, 43.564659118652344, 55.11157989501953, 6.768333435058594, 9.155706405639648, 85.35572814941406, -4.4759521484375, 22.6251220703125, 17.96905517578125, 36.862449645996094, 24.826324462890625, 32.7554931640625, 7.253192901611328, 30.895782470703125, -2.5548934936523438, 37.597312927246094, 14.373039245605469, 14.4100341796875, 25.135643005371094, 49.71121597290039, 17.177396774291992, 31.113937377929688, 46.661460876464844, 23.657848358154297, 44.02558898925781, 26.59837532043457], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000267.npy"}
|
|
{"epoch": 0.3920704845814978, "step": 268, "batch_size": 64, "mean": 25.404464721679688, "std": 21.823596954345703, "min": -14.694854736328125, "p10": -0.30285148620605423, "median": 22.961944580078125, "p90": 53.16849060058595, "max": 98.21807861328125, "pos_frac": 0.890625, "sample": [17.401519775390625, 70.59181213378906, 27.742935180664062, 21.499839782714844, 37.033531188964844, 12.394126892089844, 39.72438049316406, -1.2128639221191406, 42.31010437011719, 32.47810363769531, 59.87532043457031, -12.353034973144531, 39.40596008300781, 18.68146514892578, 74.05056762695312, -5.960357666015625, 4.222007751464844, 16.28956413269043, -14.694854736328125, 50.902740478515625, 20.116676330566406, 41.62535095214844, 2.2751426696777344, 37.74357604980469, 19.47549057006836, 50.333099365234375, 26.6492919921875, 98.21807861328125, -0.4978485107421875, 38.720245361328125, -0.7190361022949219, 5.9841156005859375, 14.95229721069336, 9.237388610839844, 58.126434326171875, 16.773948669433594, 27.7004337310791, 35.546363830566406, 23.207366943359375, 46.12998962402344, 23.37092399597168, 42.68060302734375, 17.30614471435547, 0.15214157104492188, 40.41558837890625, 17.364654541015625, 28.200286865234375, 36.62193298339844, 8.905441284179688, 24.217613220214844, 15.069129943847656, 22.716522216796875, 57.22441101074219, 26.174707412719727, 9.776311874389648, 13.907623291015625, 40.11912536621094, 6.655418395996094, 54.1395263671875, 23.743513107299805, 2.8272705078125, -9.395263671875, 2.086498260498047, 19.624267578125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000268.npy"}
|
|
{"epoch": 0.3935389133627019, "step": 269, "batch_size": 64, "mean": 32.81420135498047, "std": 23.66851234436035, "min": -9.059440612792969, "p10": -1.9296653747558592, "median": 34.81541061401367, "p90": 63.82899703979493, "max": 81.58445739746094, "pos_frac": 0.875, "sample": [32.460693359375, 72.91352844238281, 29.893020629882812, 14.903450012207031, 12.577384948730469, 32.28845977783203, 53.20514678955078, 26.081249237060547, 35.113624572753906, 0.45900726318359375, 40.40126419067383, -2.0426025390625, 59.04075622558594, 40.406402587890625, 35.10608673095703, 45.215576171875, 40.877227783203125, 14.565628051757812, 19.368911743164062, -6.355400085449219, 45.046875, 29.01424789428711, 40.65345001220703, -8.331737518310547, 16.00674819946289, 35.72052001953125, 34.52473449707031, 48.01289367675781, 81.58445739746094, 54.01800537109375, 38.82213592529297, 19.089752197265625, 67.23635864257812, 51.99740982055664, -4.85577392578125, 53.02876281738281, 62.52947998046875, 41.15204620361328, 53.746063232421875, 31.31585693359375, 1.3419837951660156, 35.47694396972656, 7.579254150390625, -5.2621917724609375, 24.128803253173828, 64.45791625976562, 48.59710693359375, 52.142791748046875, 61.165191650390625, 8.705413818359375, 51.198204040527344, 11.646102905273438, 24.546615600585938, 64.38593292236328, 6.419164657592773, 81.3481216430664, 25.53264617919922, -9.059440612792969, -1.6661453247070312, 18.711692810058594, 51.28407287597656, 68.6515884399414, -6.161827087402344, 28.147232055664062], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000269.npy"}
|
|
{"epoch": 0.39500734214390604, "step": 270, "batch_size": 64, "mean": 25.06671142578125, "std": 22.241985321044922, "min": -13.552497863769531, "p10": 1.032383155822754, "median": 21.128607749938965, "p90": 57.74253692626953, "max": 83.53317260742188, "pos_frac": 0.90625, "sample": [25.29145050048828, 49.98212432861328, 60.32280731201172, -9.736665725708008, -8.443367004394531, 20.713043212890625, 25.510433197021484, 21.63562774658203, 33.43693542480469, 59.436126708984375, 57.327125549316406, 10.141830444335938, 17.42125701904297, 17.762908935546875, 13.736289978027344, 6.158958435058594, 22.14244842529297, 16.70757293701172, 13.886787414550781, 29.886810302734375, 58.476959228515625, 18.177967071533203, 17.82117462158203, 48.79023742675781, 52.0050048828125, 32.20674133300781, 1.192556381225586, 9.772438049316406, 28.288604736328125, 23.58264923095703, -13.552497863769531, 54.879478454589844, 79.95649719238281, 0.9637374877929688, 8.780586242675781, 35.373321533203125, 36.19830322265625, 8.192087173461914, 22.742691040039062, 41.627376556396484, 83.53317260742188, 6.9471588134765625, 23.4072265625, -7.655735015869141, 13.300666809082031, 21.544172286987305, 5.683372497558594, 50.193878173828125, 8.769317626953125, 15.109066009521484, 29.548782348632812, 8.627296447753906, 7.053466796875, -3.112092971801758, 14.095001220703125, 24.92767333984375, 6.543373107910156, 19.25641632080078, -4.127040863037109, 57.920570373535156, 43.700233459472656, 70.39450073242188, 6.908557891845703, 52.904083251953125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000270.npy"}
|
|
{"epoch": 0.3964757709251101, "step": 271, "batch_size": 64, "mean": 25.931888580322266, "std": 23.311038970947266, "min": -20.83026885986328, "p10": 0.8072496414184573, "median": 22.65183448791504, "p90": 63.80620422363282, "max": 79.33973693847656, "pos_frac": 0.9375, "sample": [72.15875244140625, 34.0384521484375, 34.214820861816406, 8.855491638183594, 31.994400024414062, 23.710674285888672, 41.16450500488281, 53.534027099609375, 30.35863494873047, -1.4113235473632812, 28.539749145507812, -19.013473510742188, 64.443603515625, 21.862838745117188, 8.179893493652344, 4.1139373779296875, 32.59498596191406, 22.888931274414062, 10.700187683105469, 2.2251815795898438, 1.090047836303711, 11.240280151367188, 55.536285400390625, 51.60809326171875, 65.04513549804688, 59.52386474609375, 26.053741455078125, 11.416147232055664, 21.562353134155273, 35.307960510253906, -6.151464462280273, 0.6860504150390625, 16.930713653564453, 7.7012176513671875, 3.685821533203125, 36.70769500732422, 0.34384918212890625, 0.5927753448486328, 7.4311981201171875, 11.54275894165039, 20.188880920410156, 79.33973693847656, 22.414737701416016, 3.446268081665039, 34.59203338623047, 11.684429168701172, 27.005889892578125, 25.713623046875, 66.71514892578125, 36.21516418457031, 71.32414245605469, 21.197715759277344, 10.666740417480469, 1.4350128173828125, 62.318939208984375, -20.83026885986328, 41.715087890625, 72.1309585571289, 30.630271911621094, 45.94264602661133, 5.167152404785156, 34.44013977050781, 15.465309143066406, 17.712379455566406], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000271.npy"}
|
|
{"epoch": 0.39794419970631423, "step": 272, "batch_size": 64, "mean": 31.53685760498047, "std": 27.45886993408203, "min": -29.17863655090332, "p10": -3.6873443603515614, "median": 31.681678771972656, "p90": 67.66065368652345, "max": 112.99320983886719, "pos_frac": 0.859375, "sample": [31.70777130126953, 4.961082458496094, 19.824926376342773, -8.124610900878906, 9.972206115722656, 65.50131225585938, 9.1409912109375, 62.99524688720703, 73.58348083496094, 34.58984375, 61.622474670410156, 60.28224182128906, -2.4966888427734375, 22.02768325805664, -29.17863655090332, 17.256074905395508, 9.486648559570312, 35.090240478515625, 17.851531982421875, 36.18865966796875, -6.8644256591796875, 52.60939025878906, 40.93701171875, 42.26239013671875, 21.189834594726562, 15.555923461914062, 6.045011520385742, 81.1505126953125, 30.429466247558594, 112.99320983886719, 31.1412353515625, 35.88232421875, 17.56741714477539, 37.692962646484375, 11.349634170532227, 40.499839782714844, 67.8832778930664, -6.183006286621094, 36.69297790527344, 31.65558624267578, 70.87173461914062, 19.106597900390625, 47.14544677734375, 30.157094955444336, 47.103851318359375, 4.978767395019531, 60.13812255859375, 11.986907958984375, 71.50845336914062, 56.048667907714844, -15.130363464355469, 33.779109954833984, -4.1627044677734375, -6.486228942871094, -2.5781707763671875, 65.07135009765625, 31.2357234954834, 41.22489547729492, 67.14119720458984, 5.023153305053711, 34.983802795410156, 9.211284637451172, 70.8763427734375, 36.350921630859375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000272.npy"}
|
|
{"epoch": 0.39941262848751835, "step": 273, "batch_size": 64, "mean": 30.293479919433594, "std": 25.976314544677734, "min": -21.275039672851562, "p10": 3.9777818679809576, "median": 27.245826721191406, "p90": 66.43413619995118, "max": 104.26617431640625, "pos_frac": 0.9375, "sample": [54.60099792480469, 46.90477752685547, 4.7202911376953125, 34.38129425048828, 71.61015319824219, 104.26617431640625, 10.255950927734375, 20.53651237487793, 74.49371337890625, -1.1814117431640625, 44.94261932373047, 22.505722045898438, 40.31629180908203, 17.359827041625977, -7.96088981628418, 27.894485473632812, 3.693052291870117, 6.230022430419922, 31.252864837646484, 49.684410095214844, 44.28277587890625, 12.069803237915039, 12.9334716796875, 24.41863441467285, 5.803224563598633, 24.0953369140625, 43.135986328125, 9.22867202758789, 18.491989135742188, 35.83177947998047, 21.171417236328125, 8.968132019042969, 62.202674865722656, 72.41925048828125, 10.778142929077148, 76.97055053710938, 59.209625244140625, 31.592926025390625, 4.64215087890625, 50.36456298828125, -13.782646179199219, 27.47735595703125, 30.521682739257812, 6.264036178588867, 48.624603271484375, 45.46397399902344, 54.04327392578125, 15.42103385925293, 8.268486022949219, -21.275039672851562, 95.13349914550781, 5.812938690185547, 28.988182067871094, 27.014297485351562, 39.56731414794922, 68.24761962890625, 11.941299438476562, 3.421894073486328, 0.351776123046875, 12.733123779296875, 51.5084228515625, 8.970321655273438, 47.048736572265625, 51.898529052734375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000273.npy"}
|
|
{"epoch": 0.4008810572687225, "step": 274, "batch_size": 64, "mean": 26.310405731201172, "std": 23.380788803100586, "min": -27.78002166748047, "p10": -0.22514152526855435, "median": 23.027732849121094, "p90": 62.186992645263686, "max": 85.86000061035156, "pos_frac": 0.890625, "sample": [24.165502548217773, 4.428655624389648, 9.484870910644531, -1.256134033203125, 41.164154052734375, 25.70305633544922, 26.732948303222656, -0.362030029296875, 6.810455322265625, 14.35443115234375, 19.86908531188965, 57.92694854736328, 65.65574645996094, 39.56062316894531, 28.63452911376953, 24.262237548828125, 34.66749572753906, 55.47050476074219, -9.763893127441406, 31.95049285888672, 32.56658172607422, 29.835987091064453, 17.2908935546875, 37.11516189575195, -27.78002166748047, 54.42103576660156, 70.4631118774414, 4.117706298828125, 9.881729125976562, 85.86000061035156, 0.09426498413085938, 22.25469970703125, 14.71529769897461, 13.062477111816406, 18.33378028869629, 43.33271789550781, 76.5072021484375, 19.279647827148438, 54.20478820800781, -1.564544677734375, 72.06402587890625, 12.367317199707031, 33.59979248046875, 18.552570343017578, 48.898704528808594, 1.4120635986328125, 26.184860229492188, 20.349302291870117, 18.306720733642578, 69.81637573242188, 33.35649108886719, 3.9633026123046875, 1.0025749206542969, 22.339988708496094, 0.9286117553710938, 35.949737548828125, -0.9976119995117188, -3.6458377838134766, 23.715476989746094, 26.024124145507812, 16.297866821289062, 49.318206787109375, 16.596481323242188, 64.01272583007812], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000274.npy"}
|
|
{"epoch": 0.4023494860499266, "step": 275, "batch_size": 64, "mean": 27.70269775390625, "std": 24.43610954284668, "min": -12.729217529296875, "p10": 0.024624252319336737, "median": 24.688678741455078, "p90": 59.178755187988294, "max": 101.51547241210938, "pos_frac": 0.890625, "sample": [46.93296813964844, 19.841800689697266, 26.452510833740234, 9.012855529785156, 7.557180404663086, 11.201332092285156, 61.39178466796875, 14.333137512207031, 82.439208984375, 12.870922088623047, 31.855361938476562, 49.428165435791016, 76.53228759765625, 33.145965576171875, 14.319602966308594, 42.917266845703125, -0.3014030456542969, 14.356338500976562, 5.8685455322265625, 32.44673156738281, 7.703041076660156, 39.581764221191406, 101.51547241210938, 5.200706481933594, 21.02581787109375, 5.454261779785156, 26.16167449951172, 41.37837219238281, 55.784637451171875, 24.356788635253906, -2.924112319946289, 7.8000946044921875, 40.73353576660156, 28.654434204101562, 31.24763298034668, 8.707229614257812, -3.0793380737304688, 16.178817749023438, -0.4181556701660156, 10.558250427246094, 25.322402954101562, 18.96405029296875, -2.7078933715820312, 4.976314544677734, 43.35947799682617, 45.11488342285156, 44.78546142578125, 84.48091125488281, 47.021995544433594, 79.42434692382812, 49.233970642089844, -12.614561080932617, 44.47108459472656, 60.63337707519531, 0.7853546142578125, 31.304595947265625, 21.51667022705078, 39.0224494934082, 3.5300064086914062, 21.92562484741211, 18.13029670715332, 33.77692413330078, -12.729217529296875, 25.02056884765625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000275.npy"}
|
|
{"epoch": 0.40381791483113066, "step": 276, "batch_size": 64, "mean": 25.469282150268555, "std": 24.726295471191406, "min": -15.752334594726562, "p10": -2.4194774627685547, "median": 22.903494834899902, "p90": 60.613398742675784, "max": 88.23272705078125, "pos_frac": 0.8125, "sample": [25.243629455566406, 88.23272705078125, 30.29239273071289, 50.563751220703125, 67.23814392089844, 30.986663818359375, -2.4665298461914062, 65.28619384765625, 4.04823112487793, 38.435970306396484, 7.08256721496582, 17.854835510253906, -2.1160202026367188, 22.886188507080078, 32.044776916503906, 35.2022705078125, -9.827133178710938, -1.7466049194335938, 7.9458465576171875, 11.359180450439453, 5.579643249511719, 17.783370971679688, 7.627769470214844, 46.320526123046875, 14.126083374023438, -2.3096885681152344, 3.3241615295410156, 34.28201675415039, 22.920801162719727, 37.37627410888672, -8.292961120605469, 19.739253997802734, 30.037208557128906, 35.59776306152344, 52.58741760253906, 16.928138732910156, 12.041580200195312, -3.847686767578125, -1.2893199920654297, 17.335254669189453, 28.291101455688477, 10.026351928710938, 0.9770164489746094, 61.26026916503906, 48.95280456542969, -2.9015769958496094, 43.891845703125, 32.35105895996094, 49.45192337036133, 27.231807708740234, 62.40791320800781, 5.961460113525391, 58.332664489746094, 32.58086395263672, -1.6673049926757812, 56.18377685546875, 24.984445571899414, 5.350929260253906, 59.104034423828125, -7.0157623291015625, -15.752334594726562, 77.5628662109375, 14.88133430480957, 81.17184448242188], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000276.npy"}
|
|
{"epoch": 0.4052863436123348, "step": 277, "batch_size": 64, "mean": 24.34532928466797, "std": 19.181516647338867, "min": -7.5345458984375, "p10": 3.687131690979004, "median": 19.123876571655273, "p90": 52.39681053161621, "max": 82.7918701171875, "pos_frac": 0.9375, "sample": [11.456230163574219, 28.641056060791016, 3.5633506774902344, 19.697906494140625, 12.278209686279297, 36.22100067138672, 35.2236328125, 30.26581382751465, 7.1704864501953125, 38.412841796875, 25.347259521484375, 7.754796981811523, 42.625465393066406, -1.7754135131835938, 68.79217529296875, 8.429031372070312, 3.975954055786133, 11.108383178710938, 35.806373596191406, 14.519355773925781, 17.362979888916016, 29.885726928710938, 12.451362609863281, 35.131385803222656, -7.5345458984375, 26.381683349609375, 57.14491271972656, 41.417022705078125, 9.793174743652344, 24.146270751953125, -0.44715118408203125, 82.7918701171875, 10.424606323242188, 53.45660400390625, 7.942512512207031, 7.890438079833984, 49.11711883544922, 35.75077819824219, 16.220348358154297, 8.433746337890625, 38.82197570800781, 30.857704162597656, 58.850372314453125, 13.122905731201172, 1.9441909790039062, 18.549846649169922, 17.839263916015625, 52.415836334228516, 32.03565979003906, 38.88933563232422, -2.2039794921875, 45.22306823730469, 26.017847061157227, 1.9530391693115234, 10.929161071777344, 4.065105438232422, 19.906150817871094, 14.911758422851562, 52.3524169921875, 14.873245239257812, 28.65604019165039, 59.21185302734375, 10.300256729125977, 11.30316162109375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000277.npy"}
|
|
{"epoch": 0.4067547723935389, "step": 278, "batch_size": 64, "mean": 29.894882202148438, "std": 26.637422561645508, "min": -7.059478759765625, "p10": -1.9712816238403312, "median": 24.126537322998047, "p90": 67.29287185668946, "max": 94.26127624511719, "pos_frac": 0.875, "sample": [7.785501480102539, 68.02751159667969, 1.5127067565917969, 23.238346099853516, 22.30810546875, 40.51075744628906, 23.056838989257812, 17.836627960205078, 57.47923278808594, 24.294998168945312, 6.20220947265625, 30.109298706054688, 32.57958984375, -2.280517578125, 63.96656799316406, 23.054046630859375, -2.7885093688964844, 65.5787124633789, 69.22421264648438, -5.2517242431640625, 19.615507125854492, 93.5772705078125, 55.888427734375, 21.07872772216797, 35.24510955810547, 58.16688537597656, 39.91254425048828, 1.8573970794677734, 23.95807647705078, 91.48892974853516, 71.63671112060547, -6.987937927246094, 32.768836975097656, -5.729461669921875, 62.204742431640625, 57.40221405029297, 7.08319091796875, 6.64801025390625, 1.7829418182373047, 48.48305892944336, 17.27700424194336, 94.26127624511719, 22.830785751342773, -7.059478759765625, 14.585189819335938, 71.77603912353516, 13.580986022949219, 29.906631469726562, 28.47890853881836, 42.42030334472656, 24.348979949951172, 24.45368766784668, 12.816486358642578, 3.2158126831054688, 36.52904510498047, 53.21549987792969, -3.0003490447998047, -1.2497310638427734, 52.480743408203125, 2.947509765625, 50.17509078979492, 35.52595520019531, 5.9857330322265625, 5.244565963745117], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000278.npy"}
|
|
{"epoch": 0.40822320117474303, "step": 279, "batch_size": 64, "mean": 29.428205490112305, "std": 24.46150016784668, "min": -15.758121490478516, "p10": -2.6836544036865226, "median": 25.190579414367676, "p90": 61.04395980834961, "max": 83.1346206665039, "pos_frac": 0.84375, "sample": [5.584712982177734, 11.74264907836914, 18.383487701416016, 27.325271606445312, -7.908416748046875, -4.574981689453125, 64.70304870605469, 59.63262939453125, 18.786911010742188, 5.552082061767578, -0.0669708251953125, 45.66423034667969, 36.82952117919922, 24.287519454956055, 69.5130615234375, 61.62812805175781, 5.808704376220703, 24.564788818359375, 59.68090057373047, 20.303878784179688, 35.073211669921875, 42.05718231201172, 14.620977401733398, -2.9467391967773438, 13.749210357666016, 25.575571060180664, 24.805587768554688, 38.70781707763672, -1.506195068359375, 83.1346206665039, 17.1292724609375, 44.283843994140625, 46.704345703125, 29.92267417907715, 44.25089645385742, 54.50499725341797, 15.514015197753906, 80.800048828125, 63.20375442504883, 47.613426208496094, 8.700538635253906, -3.6002731323242188, 5.146337509155273, -4.90655517578125, 14.934900283813477, 56.263763427734375, 20.334346771240234, 39.544776916503906, 40.074485778808594, 16.515369415283203, 52.25187683105469, 45.98731994628906, 77.4427490234375, -15.758121490478516, 57.23698425292969, 4.841835021972656, 42.672630310058594, 21.833023071289062, -3.7608070373535156, 6.095420837402344, 44.45280456542969, 43.40995788574219, -2.0697898864746094, 51.12186813354492], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000279.npy"}
|
|
{"epoch": 0.40969162995594716, "step": 280, "batch_size": 64, "mean": 28.934520721435547, "std": 18.74156379699707, "min": -6.62939453125, "p10": 2.055599212646486, "median": 30.226879119873047, "p90": 50.63310089111329, "max": 76.24617004394531, "pos_frac": 0.90625, "sample": [26.580703735351562, 35.34193420410156, 14.526084899902344, 38.76576232910156, 14.613258361816406, 1.3856735229492188, 41.67793273925781, 34.73548889160156, 53.092132568359375, 7.38140869140625, 37.14421081542969, 23.202789306640625, 10.90420913696289, 34.26268768310547, 51.03028869628906, 44.01527404785156, 9.676168441772461, 76.24617004394531, 25.847999572753906, 34.44377136230469, -0.9027538299560547, 66.05604553222656, 60.9434700012207, 38.1644172668457, 43.506141662597656, 22.529582977294922, 27.939743041992188, 37.48335266113281, 5.60986328125, 52.38963317871094, 49.706329345703125, 34.696495056152344, 39.181365966796875, 35.91777801513672, -4.045654296875, 14.282835006713867, 26.269912719726562, 71.18549346923828, 19.925552368164062, 48.11559295654297, 28.707138061523438, 3.6187591552734375, -6.5164794921875, 30.751617431640625, -2.276700973510742, -6.62939453125, 23.37335205078125, 35.613746643066406, 15.400711059570312, 35.56690979003906, 39.15865707397461, 46.841796875, 31.12880516052246, 28.020734786987305, -0.4383049011230469, 6.722282409667969, 19.53863525390625, 24.615707397460938, 25.875946044921875, 42.180938720703125, 44.99161911010742, 12.909683227539062, 39.12194061279297, 29.70214080810547], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000280.npy"}
|
|
{"epoch": 0.4111600587371512, "step": 281, "batch_size": 64, "mean": 25.293540954589844, "std": 20.211898803710938, "min": -41.6973876953125, "p10": 3.5543533325195327, "median": 22.929208755493164, "p90": 50.643093872070324, "max": 82.52790832519531, "pos_frac": 0.9375, "sample": [15.870079040527344, 68.3391342163086, 26.05425262451172, 40.823272705078125, 21.858882904052734, 9.239913940429688, 10.048812866210938, 21.674175262451172, 29.41005516052246, 29.537065505981445, 29.555397033691406, 45.8209228515625, 41.69766616821289, 9.867616653442383, 35.29371643066406, -4.5254974365234375, 31.998809814453125, 47.66998291015625, 11.997840881347656, 20.764602661132812, 34.04314422607422, 20.340057373046875, 31.843704223632812, 33.991920471191406, 15.601806640625, 23.999534606933594, 41.16786193847656, 63.808448791503906, -3.8612403869628906, 46.77635955810547, 51.775779724121094, 13.241357803344727, 26.557889938354492, 55.08457946777344, 13.918014526367188, -41.6973876953125, 17.592723846435547, 16.34920883178711, 37.36628723144531, 12.272571563720703, 82.52790832519531, 14.701522827148438, 5.2716217041015625, 24.28582000732422, 30.473003387451172, 59.244606018066406, 12.7633056640625, 17.275527954101562, 44.26875305175781, 39.05131530761719, 10.291160583496094, 26.334564208984375, -2.9491729736328125, 2.9751129150390625, 4.905914306640625, 15.708786010742188, 0.48482704162597656, 52.926116943359375, 29.401412963867188, 11.52186393737793, 14.035318374633789, 20.02996826171875, 2.0878353118896484, 48.000160217285156], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000281.npy"}
|
|
{"epoch": 0.41262848751835535, "step": 282, "batch_size": 64, "mean": 23.41071128845215, "std": 20.938098907470703, "min": -7.799657821655273, "p10": 2.514866828918457, "median": 22.222238540649414, "p90": 47.915639877319336, "max": 117.37347412109375, "pos_frac": 0.921875, "sample": [47.8800048828125, 6.143756866455078, 11.262611389160156, 6.712242126464844, 3.8667354583740234, 54.040626525878906, 25.3247127532959, 21.48503875732422, 7.822479248046875, 30.65453338623047, 47.930912017822266, 8.059188842773438, 64.7446060180664, 18.327180862426758, 6.456031799316406, 3.2026329040527344, 117.37347412109375, 17.492889404296875, 11.665023803710938, 25.14141845703125, 13.7554931640625, 29.705245971679688, 22.737743377685547, -0.552764892578125, 49.51835632324219, 58.83075714111328, 32.43724060058594, 65.28611755371094, 39.772464752197266, 31.292556762695312, 40.57150650024414, 0.5631599426269531, 32.31621551513672, -6.948308944702148, 24.26980209350586, 27.972782135009766, 2.5384521484375, 23.90148162841797, 13.090744018554688, 19.03350830078125, -7.51641845703125, 25.86650848388672, 44.47351837158203, 30.491348266601562, 43.45787048339844, 15.630325317382812, 5.204498291015625, 25.537532806396484, 28.933372497558594, 12.947898864746094, 2.504758834838867, 23.861427307128906, 25.64655303955078, 21.70673370361328, 22.79743194580078, 5.48150634765625, 18.4498291015625, 13.044347763061523, -7.799657821655273, 39.60932159423828, 9.796714782714844, 11.061355590820312, -2.2293777465820312, 33.64945983886719], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000282.npy"}
|
|
{"epoch": 0.41409691629955947, "step": 283, "batch_size": 64, "mean": 28.11335563659668, "std": 23.548233032226562, "min": -20.06414794921875, "p10": 1.1320089340209976, "median": 23.23712158203125, "p90": 59.86843490600587, "max": 92.87501525878906, "pos_frac": 0.90625, "sample": [8.051921844482422, 15.462547302246094, 29.424713134765625, 2.5974082946777344, 22.425491333007812, 68.64222717285156, 16.577117919921875, 25.639801025390625, 81.92022705078125, -6.277702331542969, -2.8098068237304688, 42.04200744628906, -16.724796295166016, 19.265655517578125, 10.618026733398438, 11.116004943847656, -20.06414794921875, 18.10580825805664, 19.755722045898438, 7.623889923095703, 92.87501525878906, 54.31073760986328, 14.616539001464844, 52.00151062011719, 20.438396453857422, 24.048751831054688, -0.8374500274658203, 57.91706848144531, 21.00098419189453, 64.67784118652344, 22.363189697265625, 32.044700622558594, 43.324134826660156, 21.275039672851562, 8.56903076171875, 5.828086853027344, 42.989471435546875, 10.15439224243164, 10.735322952270508, 0.5039806365966797, 25.771507263183594, 56.42485046386719, 45.43523406982422, 15.572637557983398, 43.61298370361328, 28.744831085205078, 39.098365783691406, 20.17768096923828, 53.89324951171875, 28.422874450683594, 43.668365478515625, 24.237648010253906, 44.98359680175781, 16.406375885009766, 15.081184387207031, 46.78902053833008, 35.19001770019531, 28.29975128173828, 31.316116333007812, 65.4822998046875, 75.17605590820312, 13.000015258789062, 60.704734802246094, -10.463541030883789], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000283.npy"}
|
|
{"epoch": 0.4155653450807636, "step": 284, "batch_size": 64, "mean": 29.832664489746094, "std": 25.786720275878906, "min": -16.82473373413086, "p10": 2.822159576416017, "median": 24.1427001953125, "p90": 65.36373291015626, "max": 116.6048583984375, "pos_frac": 0.921875, "sample": [-2.51507568359375, 15.300451278686523, 44.109161376953125, 18.48926544189453, 23.835983276367188, 7.5369110107421875, 46.750587463378906, 19.041015625, 19.806785583496094, 56.08306884765625, 77.04557800292969, 11.31417465209961, 72.87619018554688, 10.415185928344727, 21.386802673339844, 11.48154067993164, 19.216686248779297, 32.848114013671875, 25.781333923339844, 84.88404846191406, -0.780517578125, 83.08666229248047, 33.117408752441406, 49.660888671875, 16.893985748291016, 23.97838592529297, 37.54359436035156, 36.449485778808594, 40.24742126464844, -4.530113220214844, 0.6583099365234375, 116.6048583984375, 14.760881423950195, 5.77281379699707, 2.2793502807617188, 41.188636779785156, -0.4029693603515625, 21.998313903808594, 24.30701446533203, 91.96270751953125, 65.53076171875, 9.84478759765625, 33.3004150390625, 24.7862606048584, 6.961235046386719, -16.82473373413086, 23.60240936279297, 64.9739990234375, 15.497283935546875, 26.93720817565918, 56.72218322753906, 26.332542419433594, 5.150230407714844, 50.16316223144531, 30.56112289428711, 10.911062240600586, 19.312301635742188, 4.088714599609375, 49.14697265625, 43.45785903930664, 6.949832916259766, 45.668182373046875, 31.11215591430664, 24.619674682617188], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000284.npy"}
|
|
{"epoch": 0.4170337738619677, "step": 285, "batch_size": 64, "mean": 26.457977294921875, "std": 21.147918701171875, "min": -11.735397338867188, "p10": 2.6115470886230474, "median": 24.644168853759766, "p90": 49.71286468505861, "max": 94.62911987304688, "pos_frac": 0.953125, "sample": [38.514320373535156, 23.131061553955078, 13.552196502685547, 45.744720458984375, 9.878952026367188, 25.775970458984375, 36.977134704589844, 6.5964813232421875, 44.64468002319336, 34.194244384765625, 38.662078857421875, 32.47928237915039, 39.29377746582031, 6.766761779785156, 36.424095153808594, 77.13966369628906, 5.751472473144531, 2.370513916015625, 60.246307373046875, 61.536224365234375, 29.698387145996094, 7.407249450683594, 16.60192108154297, 28.227256774902344, 34.76154327392578, 8.706413269042969, 17.559490203857422, 38.65263366699219, 22.964202880859375, 3.2066726684570312, 33.174110412597656, 0.13791656494140625, -7.522743225097656, 9.953998565673828, 40.01911926269531, 77.57769775390625, 15.38571548461914, -11.735397338867188, 32.93841552734375, 8.09326171875, 21.733732223510742, 5.588775634765625, 94.62911987304688, 35.001495361328125, 0.25093841552734375, 59.02497863769531, 33.087364196777344, 20.987834930419922, 40.112876892089844, 51.41349792480469, 3.1739578247070312, 7.317173004150391, 25.384963989257812, 17.56048583984375, 1.49395751953125, 11.291202545166016, 42.85215759277344, -1.0024642944335938, 19.46558380126953, 18.369064331054688, 28.31035614013672, 44.29877471923828, 43.57347106933594, 23.90337371826172], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000285.npy"}
|
|
{"epoch": 0.4185022026431718, "step": 286, "batch_size": 64, "mean": 27.725364685058594, "std": 25.633079528808594, "min": -27.53948211669922, "p10": -1.8357982635498047, "median": 25.327802658081055, "p90": 63.56953430175781, "max": 109.6821517944336, "pos_frac": 0.859375, "sample": [14.88812255859375, -0.89605712890625, 109.6821517944336, 25.658153533935547, 14.345996856689453, 33.22942352294922, 8.185127258300781, 17.891006469726562, 37.284202575683594, 11.231475830078125, 38.973880767822266, 43.351470947265625, 9.54583740234375, 8.354990005493164, 32.45000457763672, 8.747268676757812, 72.82965087890625, 1.5756759643554688, 24.507781982421875, 46.17564010620117, 80.49715423583984, 14.721492767333984, -27.53948211669922, 68.19483947753906, 24.805068969726562, -14.650360107421875, -1.8462409973144531, 45.38847351074219, 20.24078369140625, 54.27162170410156, 63.782257080078125, 3.4448089599609375, 30.73078155517578, 48.0643310546875, 24.997451782226562, 47.0609245300293, 31.683761596679688, 52.32743835449219, 9.149856567382812, 2.7277984619140625, 35.87110900878906, 12.688468933105469, 33.283447265625, -4.8521881103515625, -3.641143798828125, 28.694156646728516, 58.026527404785156, 23.00705909729004, -2.7880802154541016, 71.39314270019531, 4.640697479248047, 23.049880981445312, 2.506959915161133, 26.87957763671875, 33.59052276611328, 63.07318115234375, -3.906097412109375, 28.469745635986328, 23.21652603149414, -1.811431884765625, 61.13905334472656, 65.24282836914062, 26.346954345703125, 34.23784637451172], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000286.npy"}
|
|
{"epoch": 0.4199706314243759, "step": 287, "batch_size": 64, "mean": 26.86972427368164, "std": 25.97897720336914, "min": -17.612552642822266, "p10": 1.4858345031738291, "median": 18.39208984375, "p90": 60.12953605651856, "max": 112.10391235351562, "pos_frac": 0.90625, "sample": [56.275352478027344, 18.3294677734375, 14.869543075561523, 6.812858581542969, 26.140409469604492, 12.449142456054688, 52.99696350097656, 87.89054870605469, 45.3304328918457, 11.095855712890625, 61.34242248535156, 24.934961318969727, 4.117837905883789, 18.4547119140625, 6.873048782348633, -0.8361968994140625, 46.13102722167969, 19.812240600585938, 51.45808410644531, 33.29139709472656, 9.597370147705078, 3.9255943298339844, 44.280067443847656, -12.177886962890625, 112.10391235351562, 14.018383026123047, 18.161779403686523, 25.786590576171875, 31.145416259765625, 1.0771408081054688, 27.7557373046875, 47.534698486328125, -2.6238632202148438, 3.5701446533203125, 15.526386260986328, 39.182464599609375, 13.097404479980469, 33.88426208496094, 6.643245697021484, 16.076522827148438, 7.634927749633789, 15.189537048339844, 3.8988113403320312, 19.64072608947754, 8.473464965820312, 40.17707061767578, 2.439453125, 8.122329711914062, 80.27330017089844, 18.163726806640625, -11.687057495117188, 11.978103637695312, 60.79005813598633, 46.35133361816406, 49.769683837890625, -1.9252185821533203, -17.612552642822266, 30.97671127319336, 5.452934265136719, 41.131141662597656, 71.69679260253906, 78.71151733398438, 45.09182357788086, 58.58831787109375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000287.npy"}
|
|
{"epoch": 0.42143906020558003, "step": 288, "batch_size": 64, "mean": 27.80404281616211, "std": 21.588449478149414, "min": -10.430191040039062, "p10": 0.9680755615234381, "median": 24.7489013671875, "p90": 58.68895874023438, "max": 77.08822631835938, "pos_frac": 0.90625, "sample": [42.943321228027344, 33.6964111328125, -9.830127716064453, 34.16639709472656, 43.471527099609375, 66.44615173339844, -4.200315475463867, 73.76081848144531, 7.517604827880859, 55.27589797973633, 11.292610168457031, 25.0731201171875, 2.991729736328125, 17.855979919433594, 16.712711334228516, 27.822792053222656, 40.30011749267578, 27.799301147460938, 10.86016845703125, 36.57148742675781, 21.87436294555664, 48.147308349609375, 24.4246826171875, 11.787864685058594, 39.86860275268555, 33.829017639160156, 64.9564208984375, 15.917535781860352, 21.295257568359375, 46.97882080078125, 15.378379821777344, 10.13333511352539, 45.285682678222656, 40.73048400878906, 40.348968505859375, 32.31626892089844, 16.371536254882812, 3.3149871826171875, 31.87244415283203, 10.110836029052734, -6.699821472167969, 77.08822631835938, 16.639026641845703, 32.19758605957031, -1.6305465698242188, 24.012733459472656, 22.300003051757812, 9.245361328125, 71.54544067382812, 23.423744201660156, 56.03919982910156, 45.3243408203125, 34.85813903808594, 1.5783538818359375, 13.963520050048828, 62.58198547363281, 31.063873291015625, -1.94403076171875, 10.909236907958984, 54.39569091796875, 0.7065277099609375, 59.82456970214844, -10.430191040039062, 16.99525260925293], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000288.npy"}
|
|
{"epoch": 0.42290748898678415, "step": 289, "batch_size": 64, "mean": 30.28182601928711, "std": 29.362388610839844, "min": -13.456769943237305, "p10": 1.5122701644897463, "median": 24.460418701171875, "p90": 73.26582870483402, "max": 115.80284118652344, "pos_frac": 0.90625, "sample": [27.581722259521484, 24.37823486328125, 82.82879638671875, 93.83366394042969, 4.158674240112305, 55.04747009277344, 47.44062805175781, 10.270034790039062, 22.841262817382812, 76.89718627929688, 5.7475738525390625, 37.35865783691406, 29.660388946533203, 108.11963653564453, 62.498931884765625, 47.085750579833984, 34.75776672363281, 98.66001892089844, -0.45201873779296875, 41.016319274902344, 17.226125717163086, 3.4049434661865234, 33.385101318359375, 10.132152557373047, 2.454465866088867, 33.60587692260742, 47.602596282958984, 28.214065551757812, 13.844673156738281, 9.747987747192383, 46.37474060058594, 65.67088317871094, 41.799278259277344, 1.583749771118164, 5.784934997558594, 5.950447082519531, 115.80284118652344, 7.536670684814453, 1.4816360473632812, -4.008930206298828, 23.30004119873047, 52.90167236328125, 1.9136428833007812, 46.076148986816406, -4.140459060668945, 45.36558532714844, 76.52080535888672, 30.309778213500977, 18.597230911254883, -8.476062774658203, 11.438827514648438, 24.5426025390625, -13.456769943237305, 4.088552474975586, 47.148193359375, 19.366485595703125, 7.157197952270508, 12.285484313964844, 2.6164398193359375, 38.55750274658203, 51.950103759765625, -3.0475921630859375, 39.34112548828125, 16.35541534423828], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000289.npy"}
|
|
{"epoch": 0.4243759177679883, "step": 290, "batch_size": 64, "mean": 31.53160858154297, "std": 29.577234268188477, "min": -17.622474670410156, "p10": -0.9667409896850552, "median": 27.486618041992188, "p90": 65.35994110107423, "max": 134.90945434570312, "pos_frac": 0.890625, "sample": [39.851898193359375, 27.346088409423828, 28.85039520263672, 22.21839141845703, -14.391975402832031, 26.575775146484375, 71.28742980957031, -4.651008605957031, 25.350112915039062, 9.598655700683594, 41.453941345214844, 57.676612854003906, 15.449005126953125, 9.790557861328125, 29.762168884277344, 45.83139419555664, 58.73046875, 28.93714141845703, 13.182159423828125, 28.002918243408203, 5.123687744140625, 35.09388732910156, -17.622474670410156, 40.593814849853516, 8.961524963378906, -14.289924621582031, 13.722515106201172, 27.02367401123047, 92.247802734375, 93.97775268554688, 2.404672622680664, 84.27133178710938, 41.79753112792969, 53.91371154785156, 38.16654968261719, -5.543405532836914, 58.828277587890625, 8.427810668945312, 30.15900421142578, 14.673233032226562, 18.5128173828125, 39.345848083496094, 28.117233276367188, 44.78582763671875, 8.703117370605469, 61.89875030517578, 18.14537811279297, 4.433937072753906, 13.818412780761719, -2.411632537841797, 64.94783020019531, 65.53656005859375, 16.757783889770508, 55.803131103515625, 8.638023376464844, 90.2716064453125, 59.93951416015625, 27.627147674560547, 6.880283355712891, -4.545894622802734, 4.869037628173828, 134.90945434570312, 60.892913818359375, 17.362831115722656], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000290.npy"}
|
|
{"epoch": 0.42584434654919234, "step": 291, "batch_size": 64, "mean": 27.701282501220703, "std": 22.96601104736328, "min": -23.757659912109375, "p10": 3.5265132904052745, "median": 25.02838134765625, "p90": 62.42000961303711, "max": 78.1318359375, "pos_frac": 0.953125, "sample": [7.079593658447266, 23.017845153808594, 26.9573974609375, 30.256738662719727, 1.08746337890625, 61.23486328125, 78.1318359375, 34.704994201660156, 17.176467895507812, 62.49060821533203, 22.04608154296875, 50.64453125, 67.11943817138672, 9.703453063964844, 41.381568908691406, 18.838714599609375, 64.38252258300781, 47.50257873535156, 19.247220993041992, 20.285171508789062, 26.5379638671875, 12.931510925292969, 31.320287704467773, 10.862640380859375, 49.539947509765625, 62.255279541015625, 13.89459228515625, 69.32884216308594, 35.577598571777344, 1.48162841796875, 20.487716674804688, 9.232742309570312, 47.15741729736328, 2.9619293212890625, 28.653182983398438, -20.13592529296875, 7.238960266113281, 28.48046875, -23.757659912109375, 30.893157958984375, 7.26068115234375, 47.385589599609375, 51.84901428222656, 32.04357147216797, 9.772586822509766, 2.2553043365478516, -9.117986679077148, 42.94291687011719, 9.17452621459961, 32.26427459716797, 4.843875885009766, 10.86773681640625, 76.02719116210938, 19.01476287841797, 5.907127380371094, 29.6837158203125, 38.002777099609375, 26.178802490234375, 59.4681396484375, 20.921783447265625, 11.719413757324219, 23.877960205078125, 65.97709655761719, 6.329751968383789], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000291.npy"}
|
|
{"epoch": 0.42731277533039647, "step": 292, "batch_size": 64, "mean": 29.846210479736328, "std": 24.613697052001953, "min": -8.015777587890625, "p10": 2.3450508117675786, "median": 25.066513061523438, "p90": 66.46574211120607, "max": 102.83447265625, "pos_frac": 0.921875, "sample": [32.96112060546875, 54.40960693359375, 14.789487838745117, 25.13549041748047, 2.105010986328125, 13.685523986816406, 11.4080810546875, 26.16834259033203, 40.213165283203125, -8.015777587890625, 38.465667724609375, 12.043441772460938, 55.48936462402344, 25.358596801757812, 19.13330078125, 10.579988479614258, 7.950950622558594, 94.11429595947266, 15.634185791015625, 30.035667419433594, -2.2073974609375, 74.8040771484375, 35.8939208984375, 44.75616455078125, 51.36126708984375, 102.83447265625, 63.34990310668945, 43.03892517089844, 20.844329833984375, 17.3773193359375, -1.1306610107421875, 18.1658935546875, 7.2495574951171875, 19.21917724609375, 2.9051437377929688, 13.730295181274414, 3.5580368041992188, 46.248626708984375, 24.997535705566406, 8.48065185546875, 17.893829345703125, 18.14780044555664, 38.51807403564453, 50.27299880981445, 11.104843139648438, 49.28810119628906, 44.01164245605469, 54.815879821777344, 72.31413269042969, 33.39410400390625, 7.203102111816406, 15.36578369140625, 39.94493865966797, -1.2570533752441406, 72.20927429199219, 11.704795837402344, 68.83366394042969, 26.975040435791016, 0.24353790283203125, 55.335655212402344, 67.80110168457031, 12.516735076904297, 27.956588745117188, -1.5799694061279297], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000292.npy"}
|
|
{"epoch": 0.4287812041116006, "step": 293, "batch_size": 64, "mean": 24.17245864868164, "std": 24.482250213623047, "min": -23.84355926513672, "p10": -0.8667945861816395, "median": 16.647586822509766, "p90": 60.89103240966798, "max": 91.95664978027344, "pos_frac": 0.890625, "sample": [11.172674179077148, 17.4056396484375, 13.55000114440918, 2.4453811645507812, 36.71217346191406, 23.33987045288086, 41.75286865234375, 28.382850646972656, 67.6089096069336, 6.463010787963867, 31.04344940185547, 2.5356903076171875, 13.922916412353516, -1.3410148620605469, 68.95881652832031, 1.310546875, 6.369232177734375, 57.14690399169922, -7.8769989013671875, 29.66699981689453, 43.52940368652344, -23.84355926513672, 34.24149703979492, 0.23971939086914062, 3.541238784790039, 8.457862854003906, -2.469390869140625, 9.847904205322266, 15.321674346923828, 53.34983825683594, 62.29582214355469, 21.25177001953125, 29.826765060424805, 30.183517456054688, 44.68673324584961, 7.094099044799805, 13.699211120605469, 28.18069076538086, 22.773353576660156, 8.874666213989258, 15.889533996582031, 14.021049499511719, 10.170913696289062, 37.828582763671875, 42.6539306640625, 57.613189697265625, 34.977142333984375, 76.08396911621094, 21.939098358154297, -4.5003814697265625, 12.345359802246094, -2.9459266662597656, 2.1090240478515625, 83.29667663574219, 91.95664978027344, 18.659786224365234, 11.209770202636719, 39.19528579711914, 7.24195671081543, 10.21429443359375, 5.3245849609375, 39.50017547607422, 71.90665435791016, -11.306682586669922], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000293.npy"}
|
|
{"epoch": 0.4302496328928047, "step": 294, "batch_size": 64, "mean": 27.206314086914062, "std": 26.069358825683594, "min": -9.712003707885742, "p10": -2.123927307128905, "median": 22.06829261779785, "p90": 63.07852020263673, "max": 116.46939086914062, "pos_frac": 0.859375, "sample": [35.44579315185547, 21.308204650878906, 36.5826416015625, 30.96487808227539, 17.066329956054688, 23.53872299194336, -0.7746505737304688, 18.05683135986328, 2.1463394165039062, 56.08283996582031, -1.1758575439453125, 13.100601196289062, 4.903507232666016, 60.76416015625, -3.1717376708984375, 15.125541687011719, 23.078781127929688, 75.33441162109375, 18.504596710205078, 34.24360656738281, 72.71800231933594, 0.3310089111328125, 18.423828125, 13.807138442993164, 90.86468505859375, 47.536285400390625, 31.136917114257812, 8.166162490844727, 36.74768829345703, -9.650581359863281, -5.8706512451171875, 58.70075988769531, 33.30252456665039, 79.142333984375, -9.712003707885742, 1.1448326110839844, 116.46939086914062, 22.578487396240234, 24.90369415283203, 20.709144592285156, -2.530242919921875, 18.719268798828125, 43.419410705566406, 56.971824645996094, 52.35334777832031, 33.22547912597656, 8.45156478881836, 14.921524047851562, 64.07038879394531, 2.936056137084961, 30.209033966064453, 67.90167236328125, 17.64417266845703, 22.259437561035156, 21.877147674560547, 23.766281127929688, 14.39471435546875, 37.25001525878906, 8.341554641723633, -4.55650520324707, 23.85308837890625, -7.6695709228515625, 14.226760864257812, 46.59254455566406], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000294.npy"}
|
|
{"epoch": 0.43171806167400884, "step": 295, "batch_size": 64, "mean": 27.146446228027344, "std": 25.158411026000977, "min": -43.99073791503906, "p10": 1.136109733581543, "median": 22.7630615234375, "p90": 60.93475952148438, "max": 90.85751342773438, "pos_frac": 0.90625, "sample": [9.219537734985352, 20.195531845092773, 68.59066009521484, -3.930723190307617, -11.602890014648438, 55.018638610839844, 16.239582061767578, 26.179244995117188, 33.450103759765625, 17.696243286132812, 57.76734924316406, 14.411445617675781, 22.97795867919922, 47.138282775878906, 47.6109619140625, 69.63851165771484, 29.582046508789062, -5.178752899169922, 3.507793426513672, 9.949760437011719, 53.106266021728516, 15.179697036743164, 33.14961242675781, 14.927227020263672, 1.2849102020263672, 4.80657958984375, 28.048324584960938, 1.0723381042480469, 53.271728515625, 17.425628662109375, 25.008453369140625, 1.9947052001953125, -6.546154022216797, -43.99073791503906, 38.16424560546875, 69.76380920410156, 2.5818405151367188, 37.587059020996094, 19.641563415527344, 61.31636047363281, 50.92680740356445, 22.555313110351562, 32.593170166015625, 1.455535888671875, 4.848270416259766, 10.726303100585938, 46.74415588378906, 39.88587951660156, 38.89234924316406, 62.88203430175781, 22.28406524658203, 21.146514892578125, 22.646270751953125, 90.85751342773438, 45.59608459472656, 60.04435729980469, 25.482738494873047, 57.75726318359375, 22.879852294921875, 11.191726684570312, -4.237005233764648, 16.120561599731445, 76.23037719726562, 1.6075897216796875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000295.npy"}
|
|
{"epoch": 0.4331864904552129, "step": 296, "batch_size": 64, "mean": 23.47943878173828, "std": 26.585098266601562, "min": -24.808998107910156, "p10": -4.5889438629150385, "median": 18.705669403076172, "p90": 59.79341201782227, "max": 101.13671875, "pos_frac": 0.8125, "sample": [-3.862964630126953, 19.564125061035156, 18.526885986328125, 101.13671875, -12.652740478515625, 42.69024658203125, 4.30328369140625, -3.0855255126953125, 3.281463623046875, -24.808998107910156, 33.09466552734375, 2.8697357177734375, 11.800399780273438, 42.71415710449219, 15.462570190429688, -5.8051300048828125, 1.6688556671142578, 43.000640869140625, 79.15245056152344, 20.61474609375, 21.82086181640625, 4.052177429199219, -16.801406860351562, 23.788516998291016, 31.17620086669922, 60.16436767578125, -0.342987060546875, 8.682052612304688, 6.066123962402344, 12.864627838134766, 58.92784881591797, 34.927406311035156, 26.929813385009766, 35.645545959472656, -0.2759552001953125, 51.56201934814453, 56.22581100463867, -4.900077819824219, -21.474721908569336, 54.37364959716797, 12.794269561767578, 62.32050323486328, -7.006626129150391, 11.742948532104492, 73.49020385742188, 21.49262809753418, 35.491943359375, 37.60576629638672, 4.728324890136719, 15.180276870727539, 17.784286499023438, 7.6967620849609375, 12.726577758789062, 73.1053466796875, 32.335601806640625, 19.6021728515625, 17.139205932617188, 84.02251434326172, 51.76366424560547, 5.834882736206055, 37.17488098144531, 18.88445281982422, -3.06298828125, 26.75897216796875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000296.npy"}
|
|
{"epoch": 0.434654919236417, "step": 297, "batch_size": 64, "mean": 28.571863174438477, "std": 23.788049697875977, "min": -8.700481414794922, "p10": -0.4197135925292954, "median": 21.970603942871094, "p90": 69.00554275512697, "max": 86.12737274169922, "pos_frac": 0.890625, "sample": [20.454498291015625, 20.78729248046875, 64.17095947265625, 63.579017639160156, 27.87027359008789, 20.740625381469727, 63.68324279785156, 44.90374755859375, 13.196653366088867, -8.700481414794922, 24.04789924621582, 49.7204475402832, 45.566612243652344, 32.847564697265625, 71.34536743164062, 15.038175582885742, 18.493667602539062, 23.654632568359375, 20.482940673828125, -2.426698684692383, 9.315017700195312, 27.48661994934082, 12.021329879760742, 11.723356246948242, 74.00090026855469, -4.2654571533203125, 74.57288360595703, 6.418479919433594, 42.083892822265625, 21.447235107421875, 32.63086700439453, 37.70554733276367, 39.674346923828125, 41.31926345825195, 22.493972778320312, 12.012752532958984, 46.750091552734375, 14.232254028320312, 82.78335571289062, 25.89190673828125, 22.499244689941406, 20.53563690185547, 18.795059204101562, 41.079742431640625, -5.208806991577148, 0.9890823364257812, 16.660472869873047, -2.8780059814453125, 86.12737274169922, 8.347705841064453, 10.722513198852539, 71.07750701904297, 31.0606689453125, 6.162567138671875, -8.690147399902344, 8.782981872558594, 32.82467269897461, -1.0234832763671875, 73.55662536621094, 51.640350341796875, 11.63876724243164, 21.442413330078125, 20.88235092163086, 31.81888771057129], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000297.npy"}
|
|
{"epoch": 0.43612334801762115, "step": 298, "batch_size": 64, "mean": 29.80324935913086, "std": 26.625171661376953, "min": -7.7118377685546875, "p10": 2.6134834289550786, "median": 22.148807525634766, "p90": 70.22258911132813, "max": 129.1201171875, "pos_frac": 0.921875, "sample": [30.474027633666992, 18.299293518066406, 16.646930694580078, 7.9954681396484375, 6.0292510986328125, -0.65997314453125, -0.6682243347167969, 28.31842041015625, 14.306419372558594, 14.517868041992188, 17.014999389648438, 28.906639099121094, 22.62890625, 3.1578216552734375, 8.995635986328125, 19.467567443847656, 17.88994598388672, 39.54578399658203, 14.839591979980469, 17.243064880371094, 71.009765625, -4.321044921875, 41.96996307373047, 4.042552947998047, 73.23273468017578, 71.00227355957031, 12.531272888183594, 25.907684326171875, 56.0238037109375, 74.50736236572266, 14.040502548217773, -5.966104507446289, 13.416595458984375, 3.3498706817626953, 4.0664520263671875, 89.95410919189453, 69.77976989746094, 37.70594787597656, 64.80718994140625, 34.05663299560547, 21.42207145690918, 1.6790218353271484, 129.1201171875, 24.8758544921875, 33.95695495605469, 22.35076904296875, 38.80012512207031, 58.09370040893555, 21.948505401611328, 16.72248077392578, 12.9683837890625, 58.16387939453125, 70.41236877441406, 2.3801956176757812, 57.726318359375, 36.777801513671875, 39.230918884277344, 53.17778015136719, -7.7118377685546875, 17.1466064453125, 12.422698974609375, 28.3260498046875, 22.349109649658203, 59.001373291015625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000298.npy"}
|
|
{"epoch": 0.43759177679882527, "step": 299, "batch_size": 64, "mean": 31.181495666503906, "std": 28.80525779724121, "min": -16.721725463867188, "p10": -0.9073417663574218, "median": 28.28982162475586, "p90": 71.31781692504886, "max": 108.98564147949219, "pos_frac": 0.875, "sample": [-6.166229248046875, -6.36602783203125, -0.925750732421875, 33.79985809326172, 5.968650817871094, -6.007038116455078, 49.066017150878906, 30.0233154296875, 2.653942108154297, -8.733501434326172, 75.84851837158203, 44.87995147705078, 108.98564147949219, 62.537925720214844, 34.64670944213867, 107.93438720703125, -16.721725463867188, 34.379539489746094, 18.676801681518555, 21.985794067382812, 7.091526031494141, 50.558074951171875, -0.8643875122070312, 34.89929962158203, 17.635643005371094, 24.514495849609375, 75.08062744140625, 44.37410354614258, 5.807628631591797, 3.5609207153320312, 13.722715377807617, 22.16851043701172, 43.45576477050781, 55.828819274902344, 38.648956298828125, 82.4830551147461, 25.184860229492188, 59.01324462890625, 18.110061645507812, 19.09740447998047, 15.990898132324219, 1.4248886108398438, 41.72767639160156, 5.301490783691406, 60.055015563964844, 17.27239227294922, -6.360467910766602, 79.99407958984375, 38.619232177734375, 56.43292236328125, 6.9226531982421875, 0.4398040771484375, 28.319374084472656, 50.774749755859375, 26.41793441772461, 33.75963592529297, 28.260269165039062, 103.16999816894531, 41.14364242553711, 44.26061248779297, 3.024078369140625, 33.16710662841797, 13.127321243286133, 45.53227233886719], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000299.npy"}
|
|
{"epoch": 0.4390602055800294, "step": 300, "batch_size": 64, "mean": 29.6212158203125, "std": 29.87994956970215, "min": -30.886730194091797, "p10": -8.315813827514646, "median": 26.344436645507812, "p90": 76.24153747558594, "max": 90.61348724365234, "pos_frac": 0.828125, "sample": [9.7535400390625, 54.41740417480469, 52.451744079589844, 60.313720703125, 9.361114501953125, -14.52752685546875, 14.094718933105469, 76.30490112304688, 48.52039337158203, -10.196155548095703, -30.886730194091797, 16.479782104492188, 55.28107833862305, 28.158721923828125, 18.140342712402344, 29.60742950439453, 14.748222351074219, 17.357009887695312, 4.171756744384766, 6.480701446533203, 1.7303009033203125, 38.44371795654297, 22.18170166015625, -1.8777084350585938, 19.56041717529297, 90.61348724365234, -14.565216064453125, 14.126739501953125, 24.5301513671875, 13.955047607421875, 29.828100204467773, 72.61648559570312, 33.852195739746094, -9.398284912109375, 47.88612365722656, -9.345268249511719, 74.8224868774414, 36.787315368652344, 87.15936279296875, 43.303436279296875, 22.063365936279297, 44.688507080078125, 44.81022644042969, 47.572601318359375, -5.913753509521484, 38.69279861450195, 29.979480743408203, 20.432815551757812, 76.09368896484375, 81.75843048095703, 79.16168212890625, -27.566497802734375, 10.90542984008789, -0.7616062164306641, 6.361249923706055, 34.78767395019531, 49.51648712158203, 17.16912841796875, 78.0526123046875, 79.1180419921875, -5.7021484375, 44.6566047668457, 51.26496124267578, 2.3733367919921875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000300.npy"}
|
|
{"epoch": 0.44052863436123346, "step": 301, "batch_size": 64, "mean": 31.020545959472656, "std": 25.941102981567383, "min": -6.433019638061523, "p10": 1.8240493774414066, "median": 27.818946838378906, "p90": 61.87804260253906, "max": 105.52278900146484, "pos_frac": 0.921875, "sample": [14.442977905273438, 56.309478759765625, 23.758583068847656, 71.47051239013672, 42.81208801269531, 34.769317626953125, 86.7696533203125, 19.29572296142578, 41.17101287841797, -6.433019638061523, 59.0496826171875, 61.886383056640625, 43.4080810546875, 29.01318359375, 19.88379669189453, 32.185508728027344, 24.67249298095703, 53.88414001464844, 1.6886749267578125, 31.751480102539062, 105.52278900146484, 0.4216804504394531, 5.507017135620117, 3.8507843017578125, 26.246597290039062, 33.71820068359375, 25.99475860595703, 9.307846069335938, 48.7755126953125, 48.26140594482422, 61.85858154296875, 15.189987182617188, 5.968639373779297, 36.032867431640625, 13.18598747253418, 11.559942245483398, 21.15357208251953, 29.305503845214844, 5.730371475219727, 2.139923095703125, 35.98582458496094, 4.144899368286133, 8.009864807128906, 26.8212890625, 18.228111267089844, 81.176513671875, 34.607933044433594, 28.816604614257812, 58.80279541015625, -6.156318664550781, 73.90831756591797, 54.67992401123047, 43.61678695678711, 32.32378387451172, 48.62139892578125, 22.22611427307129, -3.0548782348632812, -1.8812179565429688, 102.598388671875, 15.987495422363281, -1.2042694091796875, 8.921028137207031, 5.354129791259766, 41.258628845214844], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000301.npy"}
|
|
{"epoch": 0.4419970631424376, "step": 302, "batch_size": 64, "mean": 29.15094566345215, "std": 29.601024627685547, "min": -25.776611328125, "p10": -2.318616485595701, "median": 23.703075408935547, "p90": 65.9661346435547, "max": 134.09747314453125, "pos_frac": 0.875, "sample": [22.360485076904297, 51.32550048828125, 53.563018798828125, 14.807971954345703, 3.684356689453125, 26.229036331176758, 77.89585876464844, 6.005378723144531, 11.839553833007812, 64.70065307617188, 23.226409912109375, 12.966211318969727, 7.615989685058594, -0.18387603759765625, 19.181793212890625, -6.543308258056641, 51.798065185546875, 1.1622276306152344, 50.01708221435547, 42.71928405761719, 10.278045654296875, 32.98939514160156, 36.590171813964844, 29.003456115722656, -9.489952087402344, -25.776611328125, 34.66673278808594, 66.50848388671875, 23.203798294067383, -17.523887634277344, 33.956581115722656, 24.17974090576172, 52.89789581298828, 5.80963134765625, 11.502857208251953, 98.77389526367188, 134.09747314453125, 20.416427612304688, 40.3216552734375, 20.488067626953125, -3.2335052490234375, 82.92277526855469, -15.126617431640625, 49.176666259765625, 31.170372009277344, 45.83287048339844, 9.58419418334961, 92.54571533203125, 10.727832794189453, 18.027542114257812, 24.98740577697754, 38.24488830566406, 16.301185607910156, 24.6669921875, 24.981204986572266, 29.866073608398438, 7.566444396972656, 64.3162612915039, -4.483207702636719, 86.1319580078125, 18.148681640625, 6.6305084228515625, 27.320026397705078, 22.088701248168945], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000302.npy"}
|
|
{"epoch": 0.4434654919236417, "step": 303, "batch_size": 64, "mean": 28.024972915649414, "std": 28.981969833374023, "min": -20.75140380859375, "p10": -6.834532737731931, "median": 22.473968505859375, "p90": 74.78558654785157, "max": 110.96018981933594, "pos_frac": 0.84375, "sample": [38.60797119140625, 1.6645851135253906, 1.0915756225585938, 79.94833374023438, -9.965784072875977, 22.355228424072266, 51.35832214355469, 82.13778686523438, 1.1594047546386719, 2.8379898071289062, 49.674957275390625, 5.701934814453125, 51.13279724121094, 8.437667846679688, 22.592708587646484, 8.333736419677734, 10.103233337402344, 16.421228408813477, -0.9557323455810547, 51.489593505859375, -20.75140380859375, 26.94344711303711, 15.560226440429688, 27.84991455078125, 58.28864669799805, 73.61796569824219, -4.002992630004883, 44.78886795043945, -3.5442123413085938, 19.845420837402344, -8.048049926757812, 37.24702453613281, 82.35858154296875, 19.243886947631836, 61.31201171875, 7.776031494140625, -19.656349182128906, 36.834014892578125, 41.36358642578125, 110.96018981933594, 70.66751098632812, 17.482772827148438, 31.790855407714844, 77.73860931396484, 13.17401123046875, 27.29400634765625, 36.397010803222656, 80.43003845214844, 18.82524871826172, -10.414897918701172, 32.866729736328125, 13.801910400390625, 25.952835083007812, 12.473743438720703, 25.91399574279785, 75.28599548339844, -9.26547622680664, -9.524429321289062, 21.520244598388672, 3.6182994842529297, 34.861572265625, 40.020538330078125, 14.947212219238281, 45.62553405761719], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000303.npy"}
|
|
{"epoch": 0.44493392070484583, "step": 304, "batch_size": 64, "mean": 28.358028411865234, "std": 20.787368774414062, "min": -28.421630859375, "p10": 2.204212951660157, "median": 29.306751251220703, "p90": 58.225981140136724, "max": 63.08823013305664, "pos_frac": 0.90625, "sample": [-2.1608943939208984, 16.059341430664062, 36.79607391357422, 28.69585418701172, 1.9175567626953125, 12.306526184082031, 12.5426025390625, -11.324371337890625, 29.199195861816406, 61.82666015625, 29.414306640625, 57.45088195800781, 17.350624084472656, 54.52843475341797, 34.21797561645508, 35.99137878417969, 14.396949768066406, 58.55816650390625, 16.315773010253906, 17.880481719970703, 40.575416564941406, 60.97520446777344, 25.689727783203125, 18.45068359375, 50.91389465332031, 49.62862014770508, 63.08823013305664, 7.638631820678711, 35.04579162597656, 25.718700408935547, 22.369001388549805, 40.196868896484375, 23.952674865722656, 40.511268615722656, 11.259429931640625, -0.9953155517578125, 32.003082275390625, 11.372543334960938, 30.93030548095703, 31.516666412353516, -28.421630859375, 40.210227966308594, 19.12841796875, 2.873077392578125, 42.93415069580078, -4.989377975463867, 3.34429931640625, 11.748638153076172, 62.721229553222656, 31.814491271972656, 37.0115966796875, -12.627660751342773, 15.14410400390625, 58.5582275390625, 54.654335021972656, 10.269073486328125, 59.461891174316406, 26.639915466308594, 54.857940673828125, 29.111175537109375, 37.93150329589844, 50.4117431640625, 34.78776550292969, 34.53376770019531], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000304.npy"}
|
|
{"epoch": 0.44640234948604995, "step": 305, "batch_size": 64, "mean": 28.763023376464844, "std": 26.594858169555664, "min": -8.2049560546875, "p10": 2.0166198730468756, "median": 22.559646606445312, "p90": 59.107295227050784, "max": 141.55838012695312, "pos_frac": 0.9375, "sample": [46.4051513671875, 45.969703674316406, 19.119129180908203, 28.92672348022461, 36.68791580200195, 53.7015380859375, 37.46549987792969, 21.14620018005371, 44.5924072265625, 79.33416748046875, -1.0234756469726562, 11.310127258300781, 30.32530975341797, 16.589508056640625, 28.57964324951172, 56.211395263671875, 34.086334228515625, 52.68321990966797, 43.17139434814453, 5.929450988769531, 141.55838012695312, 50.012451171875, 23.989017486572266, 17.16907501220703, 66.74371337890625, 8.41700553894043, 16.09552574157715, 28.812515258789062, 49.993858337402344, 13.142745971679688, 8.280609130859375, 65.62576293945312, 3.8329925537109375, 19.870168685913086, 33.3248291015625, 3.9044189453125, 59.06744384765625, 25.223068237304688, 17.570703506469727, 13.694847106933594, 12.067113876342773, 0.5666580200195312, 23.02783203125, 7.330604553222656, 50.86962890625, 59.12437438964844, 98.41322326660156, 2.5884246826171875, -2.5735549926757812, 25.856735229492188, 15.05853271484375, 62.00867462158203, 35.612030029296875, 18.074050903320312, -8.2049560546875, 0.085968017578125, 12.615104675292969, -3.3809814453125, 22.091461181640625, 1.7715606689453125, 3.8770980834960938, 37.150482177734375, 3.6214599609375, 5.641382217407227], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000305.npy"}
|
|
{"epoch": 0.447870778267254, "step": 306, "batch_size": 64, "mean": 27.875524520874023, "std": 23.330718994140625, "min": -33.739402770996094, "p10": 3.3302150726318365, "median": 26.619155883789062, "p90": 61.411904907226564, "max": 75.01348876953125, "pos_frac": 0.921875, "sample": [12.371959686279297, -10.141162872314453, 24.720073699951172, 17.77557373046875, -12.7041015625, 11.747726440429688, 67.87069702148438, 27.300933837890625, 9.9638671875, 18.114601135253906, 42.51029968261719, 47.688941955566406, -5.174125671386719, 14.44891357421875, 31.904212951660156, 9.755699157714844, 17.46493148803711, 55.86084747314453, 50.25428771972656, 13.719390869140625, 5.5173187255859375, 34.743125915527344, -3.400115966796875, 32.830116271972656, 25.9373779296875, 11.955366134643555, 11.23513412475586, 4.547176361083984, 33.292579650878906, 67.79154968261719, 7.7909698486328125, 1.1313362121582031, 61.68994140625, 34.023406982421875, 12.637947082519531, 56.586341857910156, 6.11945915222168, 3.9508514404296875, 25.338205337524414, 35.593223571777344, -33.739402770996094, 73.02885437011719, 34.547760009765625, 6.4836883544921875, 3.064228057861328, 41.64197540283203, 13.736213684082031, 42.84180450439453, 60.763153076171875, 67.53675842285156, 52.937835693359375, 48.52122497558594, 28.009319305419922, 31.053003311157227, 28.0289306640625, 13.494464874267578, 25.27771759033203, 17.59259033203125, 36.62446594238281, 56.57235336303711, 75.01348876953125, 53.572235107421875, 30.328781127929688, 62.33715057373047], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000306.npy"}
|
|
{"epoch": 0.44933920704845814, "step": 307, "batch_size": 64, "mean": 25.549152374267578, "std": 24.11737060546875, "min": -12.283903121948242, "p10": -0.2011135101318357, "median": 21.4307918548584, "p90": 54.73014450073242, "max": 98.60552978515625, "pos_frac": 0.890625, "sample": [49.3238525390625, 13.175361633300781, 5.9891204833984375, 17.875045776367188, 22.64993667602539, 38.72811508178711, -11.00836181640625, 4.2374420166015625, 6.1958465576171875, 94.39227294921875, 22.66642189025879, 14.675209045410156, 58.00956344604492, 11.209770202636719, 50.733551025390625, -10.874687194824219, 54.41838836669922, 3.9272842407226562, 40.566314697265625, -6.3669281005859375, 21.19892120361328, 18.08367156982422, 21.422775268554688, 54.86375427246094, 98.60552978515625, 21.573959350585938, 21.43880844116211, 10.165044784545898, 40.70869827270508, 37.985687255859375, 9.590730667114258, 46.26115417480469, 17.344297409057617, 18.70014190673828, 0.0183868408203125, 12.722713470458984, 23.654495239257812, 7.396736145019531, 59.03895568847656, 33.94615173339844, 21.57074737548828, 36.167694091796875, -0.2951850891113281, 24.319580078125, 38.85499572753906, 15.687156677246094, 39.20606994628906, 46.15900421142578, 0.7220745086669922, 22.68468475341797, 40.87384796142578, 0.4033927917480469, 68.60494995117188, 28.44025421142578, 52.131927490234375, 83.70606994628906, -9.954185485839844, -2.8973388671875, 15.630516052246094, 15.714202880859375, 21.687705993652344, 16.614654541015625, 16.152633666992188, -12.283903121948242], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000307.npy"}
|
|
{"epoch": 0.45080763582966227, "step": 308, "batch_size": 64, "mean": 28.892669677734375, "std": 26.63459014892578, "min": -25.335601806640625, "p10": 2.3698194503784196, "median": 24.25887680053711, "p90": 59.121273040771484, "max": 107.04135131835938, "pos_frac": 0.921875, "sample": [40.16171646118164, 68.34378051757812, 4.141025543212891, 26.13103485107422, 11.074577331542969, 88.77017211914062, -25.335601806640625, 17.617034912109375, 57.654876708984375, 7.056310653686523, 38.78302001953125, 20.011905670166016, 16.416027069091797, 17.42414093017578, 50.81072998046875, 59.350830078125, -18.308101654052734, 38.71882629394531, 10.168853759765625, -1.2668991088867188, 10.850341796875, 107.04135131835938, 42.55059814453125, 71.11024475097656, 13.872451782226562, 26.980697631835938, 21.76769256591797, 0.13568687438964844, 35.184574127197266, 27.119600296020508, 31.946868896484375, 23.62749481201172, 30.226394653320312, 39.41326141357422, 36.72636413574219, 55.144500732421875, 37.91107177734375, 13.132621765136719, 24.8902587890625, 8.074151992797852, 11.83659553527832, 54.90739440917969, 18.496002197265625, 34.168243408203125, 27.79478645324707, 104.23809814453125, 18.179649353027344, 26.990455627441406, 10.285850524902344, 52.7344970703125, 85.38011932373047, 50.21751403808594, 6.951835632324219, 17.563583374023438, -7.805532455444336, 1.6107311248779297, 58.58563995361328, 44.46343231201172, 14.256149291992188, -3.0261688232421875, 12.746200561523438, 9.696701049804688, 8.276798248291016, 5.151756286621094], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000308.npy"}
|
|
{"epoch": 0.4522760646108664, "step": 309, "batch_size": 64, "mean": 28.214529037475586, "std": 24.55332374572754, "min": -16.1966552734375, "p10": -1.191508865356445, "median": 26.839466094970703, "p90": 60.88320617675782, "max": 96.36068725585938, "pos_frac": 0.859375, "sample": [1.8738899230957031, 13.008724212646484, 7.969018936157227, -12.125396728515625, 17.897300720214844, 53.950927734375, 22.369970321655273, -0.24370193481445312, 50.78529357910156, 23.3631591796875, 52.93495178222656, 40.249176025390625, 23.195301055908203, 64.36677551269531, 15.635528564453125, 12.780447006225586, -10.487789154052734, 60.487213134765625, 51.448883056640625, 38.941795349121094, 39.395896911621094, -16.1966552734375, 43.991188049316406, -3.7753143310546875, 33.362091064453125, 7.5409698486328125, 54.2818717956543, 32.88995361328125, -0.899200439453125, -1.3167839050292969, 0.8802566528320312, 63.85858154296875, 29.024215698242188, 4.4117279052734375, 32.05254364013672, 61.117401123046875, 96.36068725585938, 27.84844207763672, 20.081148147583008, -2.041351318359375, 5.546024322509766, 7.312963485717773, 16.072711944580078, 76.28123474121094, 13.360870361328125, 32.746910095214844, 61.05291748046875, 8.017974853515625, 45.255043029785156, 11.302745819091797, 56.05132293701172, -1.4135513305664062, 34.64064025878906, 56.934326171875, 6.763917922973633, 27.917842864990234, 36.10658264160156, 85.86305236816406, 22.735733032226562, 32.38488006591797, 34.72856140136719, 23.641584396362305, 35.32598876953125, 25.830490112304688], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000309.npy"}
|
|
{"epoch": 0.45374449339207046, "step": 310, "batch_size": 64, "mean": 24.480121612548828, "std": 20.728391647338867, "min": -4.174957275390625, "p10": 3.291192626953125, "median": 18.018381118774414, "p90": 56.42516708374025, "max": 81.45247650146484, "pos_frac": 0.953125, "sample": [-2.370098114013672, 26.74712371826172, 16.526084899902344, 17.45014190673828, 32.81314468383789, 49.213294982910156, 47.39674758911133, 8.645380020141602, 6.88279914855957, 28.31024169921875, 0.2938957214355469, 30.08026123046875, 37.21336364746094, 18.586620330810547, 3.9571094512939453, 4.254646301269531, 19.668298721313477, 41.0228271484375, 13.081886291503906, 44.9711799621582, 23.134357452392578, -2.069681167602539, 78.8306884765625, 3.285369873046875, 9.2740478515625, 39.50617980957031, 53.714691162109375, 23.731914520263672, 25.054758071899414, 15.304039001464844, 8.99945068359375, 22.832115173339844, 8.026782989501953, 3.2717018127441406, 71.40925598144531, 15.754009246826172, 26.018142700195312, 11.824193954467773, 10.768798828125, -4.174957275390625, 16.241973876953125, 27.140045166015625, 48.16236877441406, 58.47410583496094, 17.42645263671875, 62.16136169433594, 81.45247650146484, 0.25743865966796875, 58.88336181640625, 3.304779052734375, 19.618080139160156, 7.012044906616211, 15.633285522460938, 17.057899475097656, 12.41839599609375, 4.4396514892578125, 3.420166015625, 39.01311492919922, 48.22784423828125, 13.462757110595703, 9.231193542480469, 32.55516815185547, 57.58679962158203, 24.3062801361084], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000310.npy"}
|
|
{"epoch": 0.4552129221732746, "step": 311, "batch_size": 64, "mean": 34.34797286987305, "std": 25.421321868896484, "min": -20.857208251953125, "p10": 5.890789985656738, "median": 30.237972259521484, "p90": 72.03687438964845, "max": 103.04837036132812, "pos_frac": 0.96875, "sample": [21.84406280517578, 54.49542236328125, 60.832237243652344, 21.93341827392578, 1.4692020416259766, 24.17034912109375, 13.413280487060547, 47.70599365234375, 72.8861083984375, 58.83903503417969, 44.74891662597656, 13.998626708984375, 44.057918548583984, 80.66146850585938, 10.906436920166016, 13.959381103515625, 37.956298828125, 23.299463272094727, 21.258636474609375, 59.495208740234375, 32.979644775390625, 59.527198791503906, 70.05532836914062, 22.84215545654297, 9.374076843261719, 35.30080032348633, 31.40240478515625, 15.977157592773438, 30.629066467285156, 52.00702667236328, 9.252761840820312, 32.07847213745117, 26.7469482421875, 51.51478576660156, 55.1357421875, -20.857208251953125, 6.0781707763671875, 103.04837036132812, -2.5849685668945312, 47.79966735839844, 15.554351806640625, 12.688423156738281, 50.49293518066406, 24.951189041137695, 74.03489685058594, 29.700042724609375, 2.332500457763672, 5.810483932495117, 50.77480697631836, 51.31095886230469, 4.725856781005859, 81.14859008789062, 28.798751831054688, 3.0349960327148438, 13.446502685546875, 86.86341857910156, 38.508949279785156, 29.846878051757812, 9.462318420410156, 9.821556091308594, 48.566650390625, 49.03948974609375, 6.448600769042969, 74.6680679321289], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000311.npy"}
|
|
{"epoch": 0.4566813509544787, "step": 312, "batch_size": 64, "mean": 29.852706909179688, "std": 26.55569839477539, "min": -13.208303451538086, "p10": -3.887220764160156, "median": 27.987163543701172, "p90": 61.9832332611084, "max": 104.31178283691406, "pos_frac": 0.84375, "sample": [24.432857513427734, 92.80812072753906, 60.294891357421875, 29.174545288085938, 29.653398513793945, 11.992408752441406, 24.232967376708984, 6.713933944702148, 43.57597351074219, 61.208473205566406, -9.498716354370117, 98.43940734863281, 93.9454345703125, 53.66712951660156, 50.425323486328125, 20.313079833984375, -9.406423568725586, 14.782821655273438, 46.46839904785156, 70.01056671142578, 22.15833282470703, 13.280899047851562, 34.50608825683594, 35.03790283203125, 22.957305908203125, -13.208303451538086, 31.83612823486328, 52.8565673828125, 32.793296813964844, 16.773086547851562, 15.243721008300781, -3.957427978515625, 34.487205505371094, 23.038299560546875, 14.093772888183594, 22.220535278320312, 35.37810516357422, 62.31527328491211, 29.461669921875, 36.15615463256836, 65.5171127319336, 9.711271286010742, -4.010869979858398, 17.537151336669922, 30.40557289123535, 29.19062042236328, -6.542713165283203, 104.31178283691406, -3.7234039306640625, 48.9134521484375, 15.47861099243164, -3.2704010009765625, 12.54412841796875, 10.809253692626953, 10.556640625, 44.36811828613281, 42.854915618896484, -1.477762222290039, -7.27276611328125, 27.77273941040039, 52.31461715698242, 16.228622436523438, 28.201587677001953, 39.49172592163086], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000312.npy"}
|
|
{"epoch": 0.4581497797356828, "step": 313, "batch_size": 64, "mean": 25.848873138427734, "std": 26.925546646118164, "min": -26.848411560058594, "p10": -3.0759815216064452, "median": 20.964481353759766, "p90": 64.723885345459, "max": 94.69051361083984, "pos_frac": 0.84375, "sample": [43.433746337890625, 26.28232192993164, -1.0288162231445312, 30.998672485351562, 22.17395782470703, 27.080703735351562, 53.242061614990234, 44.51271057128906, 61.56652069091797, 9.628791809082031, 7.672473907470703, -0.92803955078125, 9.1312255859375, 15.36224365234375, 5.037021636962891, -3.8889541625976562, 23.821273803710938, 38.123512268066406, 22.81167984008789, 31.71197509765625, 25.45691680908203, 66.07704162597656, -26.848411560058594, 1.8635425567626953, 17.301681518554688, 69.11832427978516, 35.4950065612793, 10.612689971923828, 21.595481872558594, 88.24822998046875, 49.619361877441406, 38.665496826171875, 0.6450099945068359, 25.280258178710938, -7.813575744628906, 4.311017990112305, -2.8887062072753906, -7.542732238769531, 17.145418167114258, 44.656005859375, 1.2038192749023438, 15.867927551269531, 8.853580474853516, 20.333480834960938, 79.57989501953125, 33.776920318603516, -4.185600280761719, 45.683677673339844, 78.23881530761719, -3.1562423706054688, 60.78941345214844, 36.784889221191406, 14.0399169921875, 2.1119651794433594, -7.864034652709961, 89.41049194335938, 45.62042236328125, 10.949516296386719, 0.7130393981933594, 15.578643798828125, 10.837333679199219, 52.662940979003906, 14.063362121582031, 94.69051361083984], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000313.npy"}
|
|
{"epoch": 0.45961820851688695, "step": 314, "batch_size": 64, "mean": 25.937664031982422, "std": 25.701616287231445, "min": -14.2586669921875, "p10": -1.3560607910156244, "median": 19.082908630371094, "p90": 56.271993255615236, "max": 108.0374526977539, "pos_frac": 0.859375, "sample": [90.92593383789062, -12.970504760742188, 7.724567413330078, 0.8988037109375, 2.7501373291015625, 68.49433135986328, 12.380241394042969, 37.03767395019531, 9.328376770019531, 10.011688232421875, 47.846683502197266, -1.6055145263671875, 15.522529602050781, 32.74699401855469, 20.04317855834961, -14.2586669921875, 0.2081451416015625, 32.11212921142578, 55.36772155761719, 77.44305419921875, 13.186660766601562, -2.333017349243164, 5.4530487060546875, 7.53118896484375, 19.131332397460938, -5.512920379638672, 2.5932159423828125, 7.098682403564453, 14.605415344238281, 56.250572204589844, 56.35693359375, 13.220252990722656, 44.5299072265625, 49.53173828125, 46.663909912109375, -2.9180564880371094, 25.294158935546875, 57.655845642089844, 55.21788024902344, 25.141510009765625, 39.57695770263672, 53.73809051513672, 108.0374526977539, 42.588951110839844, 8.73440170288086, 16.784263610839844, 15.51641845703125, 45.49110412597656, 0.349029541015625, 10.293384552001953, 28.037200927734375, -0.7740020751953125, 47.563720703125, 46.343013763427734, 49.76231384277344, 19.255172729492188, 12.114595413208008, -9.509376525878906, 34.36285400390625, 12.439430236816406, 23.36003875732422, -0.0760040283203125, 56.28117370605469, 19.03448486328125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000314.npy"}
|
|
{"epoch": 0.461086637298091, "step": 315, "batch_size": 64, "mean": 24.5708065032959, "std": 22.940462112426758, "min": -10.803165435791016, "p10": 2.612305450439453, "median": 20.272245407104492, "p90": 57.58579978942871, "max": 107.55924987792969, "pos_frac": 0.90625, "sample": [42.790496826171875, 24.402677536010742, 16.397323608398438, 16.008331298828125, -1.3838062286376953, 29.732559204101562, 4.791341781616211, 18.13226318359375, -0.6799774169921875, 2.6231307983398438, 42.889251708984375, -1.152547836303711, -10.803165435791016, 16.172393798828125, 25.22570037841797, 63.123687744140625, 5.401653289794922, 29.14960289001465, 3.5836563110351562, 28.074310302734375, 25.984375, 59.594703674316406, 16.399024963378906, 2.8324508666992188, 19.785675048828125, 13.02730941772461, 53.64527893066406, 11.09620475769043, 58.00605773925781, -7.967998504638672, 78.2247314453125, 22.474288940429688, 2.607666015625, 20.75881576538086, 30.839305877685547, 53.563629150390625, 24.966461181640625, 46.713653564453125, 58.092002868652344, 6.863246917724609, 26.862396240234375, 30.703460693359375, 107.55924987792969, 10.692008972167969, 38.35943603515625, 14.109893798828125, -8.766807556152344, 3.949148178100586, 7.7919158935546875, 24.917694091796875, 12.88546371459961, 32.49661636352539, 83.7336196899414, 16.83971405029297, 22.320518493652344, 10.694948196411133, 56.60519790649414, 12.52935791015625, 34.11724853515625, 13.955429077148438, 20.851806640625, 14.768983840942383, 6.942331314086914, 25.626129150390625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000315.npy"}
|
|
{"epoch": 0.46255506607929514, "step": 316, "batch_size": 64, "mean": 29.87819480895996, "std": 26.384305953979492, "min": -14.510581970214844, "p10": 1.7542934417724616, "median": 24.184188842773438, "p90": 69.4810287475586, "max": 107.33357238769531, "pos_frac": 0.9375, "sample": [30.306228637695312, 15.21200942993164, 50.39225769042969, 21.096349716186523, 21.98113250732422, 67.79127502441406, 30.677146911621094, 21.68988800048828, 34.583984375, 36.18189239501953, 30.707130432128906, 37.88380432128906, 1.4422073364257812, 2.482494354248047, 19.13149642944336, 44.85307312011719, 45.10593032836914, 31.341449737548828, -2.974590301513672, 20.335845947265625, 8.763587951660156, 10.68526840209961, 105.70674896240234, 19.582351684570312, 70.70774841308594, 35.01176452636719, 16.742633819580078, 76.61592102050781, 33.5443115234375, 82.1336441040039, 0.34105873107910156, 10.413612365722656, 6.7230072021484375, 33.54690170288086, 21.082042694091797, 57.977325439453125, 21.36944580078125, 5.961631774902344, 69.94007873535156, -1.0226669311523438, 96.48037719726562, 26.387245178222656, 8.3270263671875, 45.70989990234375, 21.44183921813965, 30.731307983398438, 1.3688278198242188, 3.959453582763672, 107.33357238769531, 36.148399353027344, 68.409912109375, 18.876876831054688, 29.01045036315918, 17.197446823120117, 43.28068542480469, 3.6700897216796875, -3.7016220092773438, 26.978805541992188, 9.68243408203125, -14.510581970214844, 30.571212768554688, 7.197322845458984, 20.765037536621094, 30.84100341796875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000316.npy"}
|
|
{"epoch": 0.46402349486049926, "step": 317, "batch_size": 64, "mean": 33.848411560058594, "std": 25.116172790527344, "min": -9.27984619140625, "p10": 4.161865997314454, "median": 31.784591674804688, "p90": 67.57322387695314, "max": 110.87234497070312, "pos_frac": 0.90625, "sample": [47.08454132080078, 21.879920959472656, 25.936813354492188, 45.443603515625, 87.21343994140625, -9.27984619140625, 59.515785217285156, 47.64729309082031, -6.143218994140625, 22.48065948486328, 110.87234497070312, 24.297767639160156, 41.221099853515625, 37.865447998046875, 34.15589904785156, 49.54625701904297, 46.10319519042969, 22.980499267578125, 21.92884063720703, 29.084609985351562, 3.982086181640625, 31.753326416015625, 32.378013610839844, 72.92808532714844, 8.195018768310547, 77.2530517578125, -2.2520790100097656, 4.581352233886719, 64.28285217285156, 38.63140869140625, 43.49787902832031, 28.602798461914062, 30.043476104736328, 49.733741760253906, 54.192779541015625, 5.9284210205078125, 57.68592071533203, 31.81585693359375, 54.98976135253906, 27.511093139648438, 8.285308837890625, 69.0299301147461, -1.989084243774414, 13.638372421264648, 59.366455078125, 74.65392303466797, 54.223907470703125, -2.8540496826171875, 41.21381759643555, 8.05732536315918, 16.18770408630371, 47.93846893310547, 58.399688720703125, 32.48405456542969, 12.601066589355469, 14.004547119140625, -0.6075553894042969, 16.24443817138672, 48.877830505371094, 16.829544067382812, 8.711334228515625, 15.285507202148438, 11.162513732910156, 68.98338317871094], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000317.npy"}
|
|
{"epoch": 0.4654919236417034, "step": 318, "batch_size": 64, "mean": 30.52468490600586, "std": 25.276248931884766, "min": -15.318862915039062, "p10": -0.0843986511230449, "median": 29.37831974029541, "p90": 67.13362121582031, "max": 101.45472717285156, "pos_frac": 0.890625, "sample": [5.846014022827148, 3.6398468017578125, 82.98585510253906, 55.436279296875, 25.64202880859375, 17.252628326416016, 9.070411682128906, 34.885498046875, 28.314544677734375, 79.15789794921875, 3.1203384399414062, 36.70245361328125, 34.883209228515625, 30.882415771484375, 101.45472717285156, 37.9740104675293, -0.898529052734375, 35.083106994628906, 12.513015747070312, 29.68309211730957, 38.1195068359375, 19.026161193847656, 1.8152389526367188, 55.60626983642578, 71.71247100830078, 56.02276611328125, -1.912363052368164, 2.217395782470703, 30.37057876586914, 29.747798919677734, 20.995529174804688, 37.972591400146484, 32.01885986328125, 90.85189819335938, 16.16950225830078, 37.051631927490234, 52.42458724975586, -3.383686065673828, -15.318862915039062, 65.68103790283203, 71.17741394042969, 20.068870544433594, 8.834918975830078, 25.078147888183594, 56.52978515625, 20.594383239746094, 39.61304473876953, 31.429122924804688, -2.6162796020507812, 5.87030029296875, -2.5297393798828125, 17.355201721191406, 41.39070129394531, 44.133995056152344, 59.491153717041016, 20.44646453857422, 43.43653869628906, 14.06991958618164, 13.699363708496094, 29.07354736328125, -3.317474365234375, 67.75615692138672, 18.942054748535156, 12.234329223632812], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000318.npy"}
|
|
{"epoch": 0.4669603524229075, "step": 319, "batch_size": 64, "mean": 29.426651000976562, "std": 22.474348068237305, "min": -5.428596496582031, "p10": 3.2250631332397464, "median": 27.32065200805664, "p90": 59.21873931884767, "max": 88.7397232055664, "pos_frac": 0.9375, "sample": [29.130599975585938, 31.605770111083984, 23.207244873046875, 35.343994140625, 45.339134216308594, 25.567001342773438, 53.211395263671875, 14.440956115722656, 42.68336486816406, 17.927772521972656, 6.15814208984375, -3.989654541015625, 45.3232421875, 10.640510559082031, 1.5566482543945312, 23.4840087890625, 16.265472412109375, 44.32232666015625, 63.48253631591797, 29.04975128173828, 27.752365112304688, 27.784820556640625, 9.757797241210938, 37.26665496826172, 45.45460510253906, 3.430665969848633, 17.731468200683594, 60.27104187011719, 11.289794921875, 52.358741760253906, 7.380876541137695, 37.268856048583984, 18.333099365234375, 17.434799194335938, 3.1369476318359375, 21.50420379638672, 37.088783264160156, 23.949886322021484, -4.568794250488281, 28.16476058959961, 78.40472412109375, 88.7397232055664, 84.2524185180664, 18.302045822143555, 32.454742431640625, 4.60203742980957, 50.823524475097656, 36.34919738769531, 46.60293197631836, 69.46749877929688, 84.494384765625, 32.38988494873047, 8.050041198730469, 25.238037109375, -0.8217887878417969, 5.935863494873047, -5.428596496582031, 2.9181175231933594, 11.728553771972656, 26.888938903808594, 56.76336669921875, 23.411903381347656, 34.66651153564453, 29.53002166748047], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000319.npy"}
|
|
{"epoch": 0.4684287812041116, "step": 320, "batch_size": 64, "mean": 26.876949310302734, "std": 23.658065795898438, "min": -23.337310791015625, "p10": 5.013620758056641, "median": 20.95814323425293, "p90": 62.085929870605476, "max": 84.99072265625, "pos_frac": 0.9375, "sample": [13.85052490234375, 30.680660247802734, 64.92999267578125, 7.749351501464844, 53.27806854248047, 7.619544982910156, 50.93000793457031, 17.725135803222656, 80.35874938964844, 12.232368469238281, 68.64083099365234, 44.9659423828125, 1.815481185913086, 30.22784423828125, 44.650901794433594, 62.66282653808594, 24.167572021484375, 6.438079833984375, 12.158157348632812, 2.1563873291015625, 12.651542663574219, 51.336456298828125, 13.113504409790039, 12.159828186035156, 23.567115783691406, 20.430458068847656, 37.0772705078125, 14.566577911376953, -10.918922424316406, 24.8883056640625, -2.29425048828125, 44.13636016845703, 5.0120391845703125, 53.80465316772461, 43.91423034667969, 20.68453598022461, 5.78997802734375, 31.36962890625, 83.28573608398438, 15.73968505859375, 37.12305450439453, -5.327606201171875, 12.7069091796875, 6.295204162597656, 84.99072265625, 11.319847106933594, 39.14141845703125, 36.025238037109375, 8.042064666748047, 5.6092987060546875, 18.267929077148438, 16.2318115234375, 28.669815063476562, 70.3726577758789, 36.48542785644531, 45.97138977050781, 60.739837646484375, 6.00273323059082, 28.278427124023438, 5.017311096191406, 5.0442657470703125, 27.66936492919922, 21.23175048828125, -23.337310791015625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000320.npy"}
|
|
{"epoch": 0.4698972099853157, "step": 321, "batch_size": 64, "mean": 26.116397857666016, "std": 28.0751953125, "min": -26.505264282226562, "p10": -4.9040220260620115, "median": 23.710575103759766, "p90": 61.31600379943849, "max": 124.41720581054688, "pos_frac": 0.796875, "sample": [62.568267822265625, 33.425323486328125, 48.02311706542969, 28.032073974609375, 2.1521148681640625, 28.46318244934082, 63.732120513916016, -5.041309356689453, 7.473541259765625, 51.20222473144531, 44.20974349975586, 0.36588287353515625, -26.505264282226562, -4.583684921264648, -7.355171203613281, 30.238449096679688, 36.841094970703125, 40.55366516113281, 53.677886962890625, -1.611602783203125, 49.7799072265625, 4.095741271972656, -2.4409332275390625, 33.31596374511719, 49.519256591796875, 26.603477478027344, 30.544326782226562, -13.946226119995117, 20.873882293701172, 16.751510620117188, 19.165069580078125, 86.62210845947266, 5.860595703125, 13.160903930664062, 19.17021942138672, 77.86862182617188, -2.0831451416015625, -12.955965042114258, 124.41720581054688, 31.29737091064453, 4.279624938964844, -2.6043243408203125, 9.152273178100586, 25.51331329345703, 3.6036109924316406, 40.09281921386719, 6.201416015625, -8.661977767944336, 43.80772399902344, 34.562538146972656, 55.50763702392578, 64.8516845703125, 11.459039688110352, -1.1763153076171875, 20.83312225341797, 49.56452941894531, 41.85075378417969, 58.3940544128418, 11.29539680480957, 69.80935668945312, 16.599029541015625, 45.45688247680664, 21.9078369140625, -14.332122802734375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000321.npy"}
|
|
{"epoch": 0.4713656387665198, "step": 322, "batch_size": 64, "mean": 31.63241958618164, "std": 27.988502502441406, "min": -5.576244354248047, "p10": 1.5635950088500983, "median": 23.287200927734375, "p90": 72.7325424194336, "max": 90.50723266601562, "pos_frac": 0.90625, "sample": [21.989459991455078, 34.6121826171875, 29.728614807128906, 71.908203125, 5.222261428833008, -5.576244354248047, 64.95968627929688, 46.43516540527344, 86.7396240234375, -4.3488922119140625, 1.2514419555664062, 13.269859313964844, 69.46458435058594, 34.14002990722656, 26.511749267578125, 47.86953353881836, 26.320106506347656, 44.93260192871094, 6.6472015380859375, 3.9591827392578125, 90.50723266601562, 31.043060302734375, 17.191261291503906, 65.72241973876953, -0.11495208740234375, 16.506568908691406, 67.54815673828125, 86.81554412841797, -4.408164978027344, 89.44744110107422, 11.683784484863281, 17.7149658203125, 41.744441986083984, 27.053184509277344, 23.761802673339844, 9.66494369506836, 13.926078796386719, 33.98450469970703, 7.6099700927734375, 48.88519287109375, 15.18603515625, 73.08583068847656, 57.35303497314453, 9.58255386352539, 52.6129150390625, 60.60911560058594, 18.431175231933594, -1.5795326232910156, 45.50677490234375, 9.547042846679688, 17.18780517578125, 88.73960876464844, 22.812599182128906, 11.659175872802734, 13.510276794433594, 23.824356079101562, 8.882583618164062, 20.919883728027344, 3.774759292602539, -4.918701171875, 79.50173950195312, 5.855583190917969, 67.77456665039062, 2.291952133178711], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000322.npy"}
|
|
{"epoch": 0.47283406754772395, "step": 323, "batch_size": 64, "mean": 31.286056518554688, "std": 25.77242660522461, "min": -9.92477035522461, "p10": 2.7031930923461918, "median": 26.151376724243164, "p90": 65.30990753173829, "max": 112.2451171875, "pos_frac": 0.921875, "sample": [66.89202880859375, 50.697357177734375, 60.86676025390625, 10.821159362792969, 84.50501251220703, -9.477958679199219, 41.01545715332031, 52.24720764160156, 7.122570037841797, 8.985160827636719, 56.19587707519531, 9.145271301269531, 9.559776306152344, 44.894561767578125, 0.07949066162109375, 23.878860473632812, 25.22418975830078, 24.15992546081543, 54.14264678955078, 112.2451171875, -0.9130363464355469, 3.1314525604248047, 67.88204193115234, 45.31410217285156, 14.485248565673828, 68.95970916748047, 30.690553665161133, 24.369346618652344, 17.037704467773438, 17.516143798828125, 31.13509750366211, 48.03749084472656, 32.550872802734375, 24.128210067749023, 12.77239990234375, 2.5196533203125, 5.920989990234375, 11.011833190917969, 54.43315887451172, 43.89875793457031, 26.354206085205078, 18.965057373046875, 28.7010498046875, 85.50497436523438, -7.4646759033203125, 45.56944274902344, 55.764183044433594, 49.99135971069336, 25.94854736328125, 4.254386901855469, 59.45310974121094, 65.87178039550781, 7.66455078125, 37.4363899230957, -9.92477035522461, 30.553836822509766, 42.396141052246094, 29.473209381103516, 10.098518371582031, 63.998870849609375, 21.476882934570312, -7.707036972045898, 7.117527008056641, 22.727928161621094], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000323.npy"}
|
|
{"epoch": 0.47430249632892807, "step": 324, "batch_size": 64, "mean": 29.687463760375977, "std": 26.02460479736328, "min": -12.558183670043945, "p10": 1.3223396301269537, "median": 24.903850555419922, "p90": 69.10287399291994, "max": 96.87274169921875, "pos_frac": 0.90625, "sample": [18.21978759765625, 25.021568298339844, 11.252330780029297, 31.990066528320312, 12.642816543579102, 16.6724853515625, 18.817893981933594, 8.501724243164062, 29.34575653076172, 23.54814338684082, 96.87274169921875, 2.4467926025390625, 78.3165283203125, 9.817161560058594, 37.47760009765625, 88.95919799804688, -12.01959228515625, 6.12664794921875, 19.083175659179688, 51.50991439819336, 9.343025207519531, 35.820526123046875, 1.8232536315917969, 38.03578567504883, 35.29804992675781, 63.7189826965332, 65.62660217285156, -7.7711639404296875, 15.243568420410156, 64.87338256835938, 70.59270477294922, 20.143966674804688, 25.868515014648438, 13.646514892578125, 45.72985076904297, 62.12467956542969, 35.02813720703125, 40.559112548828125, 14.331199645996094, 13.650808334350586, 22.565818786621094, -4.40496826171875, 5.752006530761719, 19.552047729492188, 51.316162109375, 24.7861328125, -4.342449188232422, -12.558183670043945, 43.36219024658203, 30.12921142578125, 45.287689208984375, 77.15605926513672, 33.908485412597656, 1.1076622009277344, 36.50022888183594, 44.816978454589844, 75.09001159667969, -1.5276927947998047, 4.148723602294922, 85.99288940429688, 28.540050506591797, 21.731765747070312, 4.0907135009765625, 28.705970764160156], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000324.npy"}
|
|
{"epoch": 0.47577092511013214, "step": 325, "batch_size": 64, "mean": 28.70180320739746, "std": 21.971115112304688, "min": -10.567436218261719, "p10": 0.583955764770509, "median": 28.38096809387207, "p90": 59.18320541381838, "max": 77.57283020019531, "pos_frac": 0.90625, "sample": [-3.9786758422851562, -1.27276611328125, 52.70220947265625, -4.192661285400391, 42.80693054199219, 75.89482879638672, 3.034088134765625, -10.567436218261719, 39.02400207519531, 63.64720916748047, 12.146476745605469, 54.45726013183594, 71.6639404296875, 15.489425659179688, 38.94517135620117, 24.39969825744629, 39.72735595703125, 26.500118255615234, 2.5250244140625, 46.9720458984375, 11.328758239746094, 66.43910217285156, 61.20861053466797, 63.13684844970703, 4.650547027587891, 0.07411575317382812, 33.37263870239258, 31.126794815063477, 25.269424438476562, 15.551597595214844, 54.02500534057617, 34.79930114746094, 45.64973449707031, 3.255725860595703, 44.950035095214844, 30.096908569335938, 48.9664306640625, 37.08894729614258, 25.87903594970703, 41.22419357299805, 7.0594024658203125, 1.7735824584960938, 45.039642333984375, 10.075170516967773, 39.217926025390625, 15.221151351928711, 14.613182067871094, 27.799617767333984, -2.0339508056640625, 31.029727935791016, 22.538116455078125, -3.301971435546875, 4.5549163818359375, 77.57283020019531, 20.875350952148438, 23.092185974121094, 36.722023010253906, 26.966096878051758, 40.399169921875, 28.962318420410156, 36.57502746582031, 3.9288330078125, 11.060386657714844, 49.15667724609375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000325.npy"}
|
|
{"epoch": 0.47723935389133626, "step": 326, "batch_size": 64, "mean": 25.774160385131836, "std": 24.158143997192383, "min": -11.115692138671875, "p10": -0.2894470214843746, "median": 19.549297332763672, "p90": 60.580932617187514, "max": 102.02278137207031, "pos_frac": 0.890625, "sample": [0.105712890625, 28.665470123291016, 5.8630523681640625, 66.91387176513672, 43.24119567871094, -0.45880126953125, -3.3421173095703125, 33.48416519165039, 42.690887451171875, -11.115692138671875, 26.729320526123047, 16.579814910888672, 102.02278137207031, -2.5951766967773438, -4.715642929077148, 13.68109130859375, 9.334163665771484, 15.3782958984375, 16.749969482421875, 57.758277893066406, -6.47125244140625, 0.7042999267578125, 61.79064178466797, 64.50244140625, 13.845474243164062, 8.69854736328125, 83.83413696289062, 53.19489669799805, 1.9328384399414062, 4.188793182373047, 24.184722900390625, 43.994361877441406, 51.43671417236328, 34.4117317199707, 18.072341918945312, -2.8148956298828125, 7.631557464599609, 19.59990692138672, 14.127574920654297, 49.410614013671875, 28.259796142578125, 21.68572998046875, 18.41927719116211, 19.498687744140625, 31.601402282714844, 13.108890533447266, 5.849403381347656, 19.610595703125, 9.276763916015625, 28.682044982910156, 2.5076217651367188, 72.92807006835938, 52.233978271484375, 21.8607177734375, 68.6996841430664, 32.747337341308594, 30.637008666992188, 10.71295166015625, 1.2849292755126953, 22.91015625, 49.884986877441406, 52.04469299316406, 16.656566619873047, 15.1988525390625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000326.npy"}
|
|
{"epoch": 0.4787077826725404, "step": 327, "batch_size": 64, "mean": 37.30348587036133, "std": 25.048036575317383, "min": -9.133087158203125, "p10": 7.901258468627931, "median": 35.78357696533203, "p90": 74.64009704589844, "max": 106.99537658691406, "pos_frac": 0.921875, "sample": [38.42254638671875, -0.5779476165771484, 75.62722778320312, 53.59609603881836, 15.929931640625, 50.17212677001953, -4.233650207519531, 45.58226013183594, 33.884857177734375, 47.85723114013672, 47.66827392578125, 50.51676940917969, 16.073638916015625, 52.83514404296875, 31.3699951171875, 25.27063751220703, 20.001102447509766, 66.04489135742188, 79.52882385253906, 50.86119842529297, -6.491783142089844, 46.17521667480469, 75.82475280761719, 50.996070861816406, 13.807079315185547, 66.67572021484375, 30.639171600341797, 7.693828582763672, 47.416831970214844, 57.351806640625, 59.31456756591797, -2.520660400390625, 33.863616943359375, 34.426513671875, 25.287277221679688, 28.688098907470703, 76.34831237792969, 42.27928161621094, 38.06126403808594, 37.53189468383789, 30.27570343017578, 5.804695129394531, 35.24415588378906, 83.57278442382812, 28.66046142578125, 8.787712097167969, 17.34756088256836, 36.322998046875, 72.3367919921875, -9.133087158203125, 59.892852783203125, 37.36044692993164, 106.99537658691406, 10.326927185058594, 48.23713684082031, 19.368032455444336, 27.334938049316406, 88.68534088134766, 18.820480346679688, 43.9422607421875, 26.287925720214844, 8.385261535644531, 13.67181396484375, 9.0946044921875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000327.npy"}
|
|
{"epoch": 0.4801762114537445, "step": 328, "batch_size": 64, "mean": 27.931251525878906, "std": 25.192350387573242, "min": -16.01385498046875, "p10": 2.4425888061523438, "median": 20.628297805786133, "p90": 62.71346588134766, "max": 106.76388549804688, "pos_frac": 0.921875, "sample": [37.78790283203125, -0.512176513671875, 6.659431457519531, 20.608631134033203, 17.500137329101562, 27.427490234375, 33.11896896362305, 24.180831909179688, 34.86561584472656, 5.645576477050781, 30.038633346557617, 14.884159088134766, 47.08160400390625, 73.71991729736328, -16.01385498046875, 35.67315673828125, -0.26081085205078125, 7.951484680175781, 5.410112380981445, 31.640625, -8.931346893310547, 6.884124755859375, 41.20105743408203, 62.799163818359375, 50.48406982421875, 62.51350402832031, 10.384017944335938, 5.120849609375, 43.326393127441406, 71.97222137451172, 5.871831893920898, 17.761260986328125, 53.72964096069336, 15.39156723022461, -2.309507369995117, 17.994888305664062, 20.647964477539062, 44.17900848388672, 27.639862060546875, 19.32049560546875, 2.5230445861816406, 51.86835479736328, 2.4081077575683594, 15.192073822021484, 3.6730823516845703, 83.4455337524414, 8.678674697875977, 17.862564086914062, 58.52909851074219, 36.04136657714844, 53.03900146484375, 106.76388549804688, 17.865142822265625, 15.27581787109375, 1.553497314453125, 72.06939697265625, 72.91578674316406, 8.179824829101562, 6.600776672363281, 25.165119171142578, 44.528045654296875, 55.156036376953125, 20.989294052124023, 3.8880767822265625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000328.npy"}
|
|
{"epoch": 0.48164464023494863, "step": 329, "batch_size": 64, "mean": 37.172855377197266, "std": 26.980588912963867, "min": -21.483566284179688, "p10": 8.542887115478518, "median": 36.54423141479492, "p90": 68.84908599853516, "max": 108.38369750976562, "pos_frac": 0.9375, "sample": [48.895355224609375, 71.8336181640625, 87.64993286132812, 46.426673889160156, 23.928787231445312, 17.58185386657715, 66.77281188964844, 24.10596466064453, 94.4532470703125, 16.314321517944336, 41.295379638671875, 23.918228149414062, 26.944087982177734, 2.4093704223632812, 18.85837173461914, 35.605247497558594, 46.94915771484375, 48.32469940185547, 9.857381820678711, 108.38369750976562, 17.059120178222656, 19.799774169921875, 33.00633239746094, 38.99884796142578, 1.132568359375, 10.090408325195312, 15.588668823242188, 86.44189453125, 52.846099853515625, 50.4141845703125, 49.327239990234375, 14.102672576904297, 65.68758392333984, 13.678068161010742, 37.48321533203125, 69.64303588867188, 58.40264892578125, 56.561187744140625, 61.14710998535156, 46.0290641784668, 16.627857208251953, 11.067188262939453, -17.822601318359375, 57.1536865234375, 58.89410400390625, 20.497501373291016, 58.93666076660156, -15.362106323242188, 20.785802841186523, 20.506038665771484, 7.979532241821289, -1.463705062866211, 34.83012390136719, 49.266845703125, 20.78197479248047, 27.58776092529297, 42.64237976074219, 66.99653625488281, 78.85594177246094, 31.076980590820312, -21.483566284179688, 45.330482482910156, 60.705909729003906, 46.72346496582031], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000329.npy"}
|
|
{"epoch": 0.4831130690161527, "step": 330, "batch_size": 64, "mean": 31.302356719970703, "std": 24.48064422607422, "min": -23.04400634765625, "p10": 7.983120346069337, "median": 23.03891944885254, "p90": 62.92135314941407, "max": 94.6649169921875, "pos_frac": 0.9375, "sample": [94.6649169921875, 16.5576171875, 0.7209320068359375, -12.697820663452148, 32.728271484375, 55.33805847167969, 20.064014434814453, 37.712493896484375, 14.048563003540039, 10.26654052734375, 9.426755905151367, 18.23907470703125, 34.56196594238281, 51.813201904296875, 76.53340148925781, 14.346542358398438, 27.465835571289062, 42.62922668457031, 36.387996673583984, 55.29847717285156, 15.351337432861328, 21.306686401367188, 63.58064270019531, 23.081974029541016, 24.742477416992188, 54.989315032958984, -23.04400634765625, 41.00721740722656, 28.40603256225586, 12.193181991577148, 48.60028076171875, 12.989921569824219, 10.457908630371094, 14.093894958496094, 59.921653747558594, 22.97760772705078, 21.370323181152344, 55.33348083496094, 16.14832305908203, 16.7932071685791, 7.432319641113281, 70.02716064453125, 15.789804458618164, 51.773590087890625, -8.899742126464844, 89.14852905273438, 51.57389831542969, 9.268321990966797, 42.46150207519531, 13.892618179321289, 34.425071716308594, 61.38301086425781, 22.995864868164062, 58.52777099609375, 16.32807159423828, 54.586856842041016, 21.699806213378906, 11.9993896484375, -1.10589599609375, 41.512733459472656, 68.074951171875, 18.292274475097656, 69.60205841064453, 6.153411865234375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000330.npy"}
|
|
{"epoch": 0.4845814977973568, "step": 331, "batch_size": 64, "mean": 34.531578063964844, "std": 23.45041847229004, "min": -5.5077056884765625, "p10": 9.373161315917969, "median": 29.82193374633789, "p90": 67.04142227172854, "max": 96.83233642578125, "pos_frac": 0.953125, "sample": [18.811119079589844, 46.677764892578125, 54.450660705566406, 10.337875366210938, 34.816650390625, 25.13812255859375, 36.772483825683594, -0.8995361328125, 9.109466552734375, 26.574325561523438, 61.83148193359375, 20.322738647460938, 96.83233642578125, 4.3282012939453125, 44.61857223510742, 3.7662887573242188, 91.78093719482422, 8.43375015258789, 35.273895263671875, 86.65234375, 60.75341033935547, 41.34004211425781, 88.24111938476562, 33.812259674072266, 23.602970123291016, -1.47955322265625, 29.43938446044922, 9.988449096679688, 42.513816833496094, 30.204483032226562, 26.825092315673828, 24.233623504638672, 42.342552185058594, 25.620838165283203, 41.23731994628906, 12.312370300292969, 21.616802215576172, 17.466293334960938, 39.838775634765625, 88.02542877197266, 28.135353088378906, 12.977394104003906, 17.912424087524414, 47.629974365234375, 25.566131591796875, 16.32836151123047, 34.77461242675781, 69.27425384521484, 14.0162353515625, 49.8577880859375, -5.5077056884765625, 36.36884689331055, 16.040563583374023, 34.898231506347656, 46.692604064941406, 27.862777709960938, 46.12492370605469, 14.808853149414062, 23.001998901367188, 43.05314636230469, 51.33403015136719, 40.54740905761719, 77.90763854980469, 26.8504638671875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000331.npy"}
|
|
{"epoch": 0.48604992657856094, "step": 332, "batch_size": 64, "mean": 25.641084671020508, "std": 22.47309684753418, "min": -28.82213592529297, "p10": 0.036081886291505105, "median": 21.479843139648438, "p90": 55.032700347900395, "max": 83.51962280273438, "pos_frac": 0.890625, "sample": [59.248016357421875, 14.961814880371094, 20.230224609375, 38.8377799987793, 17.073272705078125, 5.7343597412109375, 38.14122772216797, 80.82290649414062, 59.04539489746094, 45.58564758300781, 36.50249481201172, 2.0954647064208984, 2.2306060791015625, 83.51962280273438, 13.974161148071289, 12.945722579956055, 30.400516510009766, 40.86192321777344, -1.8877792358398438, -1.9047565460205078, 43.07716369628906, 33.77262878417969, 37.95896911621094, 34.70419692993164, 52.59593200683594, 32.032833099365234, -7.917510986328125, 1.4217605590820312, 33.392059326171875, 54.81462860107422, 31.27862548828125, 3.483959197998047, 12.4603271484375, 1.2454833984375, -3.2428436279296875, 17.696388244628906, 63.669464111328125, 63.9456787109375, 9.553899765014648, 37.144317626953125, 37.15229797363281, 19.66977310180664, 45.392547607421875, 45.206886291503906, 9.390735626220703, 55.12615966796875, 21.67279052734375, 22.071006774902344, 16.92633056640625, 46.69844055175781, 20.82320785522461, 19.903724670410156, 42.863807678222656, 21.58782958984375, 37.016597747802734, -0.48223304748535156, -9.296829223632812, 2.4715518951416016, 7.220514297485352, 21.371856689453125, 17.512527465820312, 3.5220718383789062, 14.523326873779297, -28.82213592529297], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000332.npy"}
|
|
{"epoch": 0.48751835535976507, "step": 333, "batch_size": 64, "mean": 32.79645919799805, "std": 29.007871627807617, "min": -22.53701400756836, "p10": -0.09001903533935518, "median": 32.828224182128906, "p90": 68.4666732788086, "max": 106.86128234863281, "pos_frac": 0.890625, "sample": [-0.7310028076171875, 16.20703887939453, 70.47061157226562, -1.110107421875, 11.082557678222656, 24.700836181640625, 40.7677001953125, 51.451873779296875, 3.4059219360351562, -0.2136993408203125, 68.70657348632812, 10.117643356323242, 70.45869445800781, -6.392082214355469, 16.290634155273438, 7.8360595703125, 56.39368438720703, 16.234769821166992, 78.64877319335938, 20.74700164794922, -22.53701400756836, 33.393943786621094, 2.7699432373046875, 60.87092590332031, 65.98231506347656, 38.315765380859375, 32.89875030517578, 16.742279052734375, 47.328887939453125, 74.60393524169922, 7.988929748535156, 5.500711441040039, 0.9219207763671875, 66.66165161132812, 49.920806884765625, 48.1329345703125, 2.635448455810547, 0.19856834411621094, 58.753807067871094, 55.915733337402344, 16.823707580566406, 64.50985717773438, 6.459098815917969, 41.186256408691406, 67.90690612792969, 9.405521392822266, 35.418556213378906, 32.75769805908203, 106.86128234863281, 55.11131286621094, 58.675254821777344, 51.83306121826172, 99.28328704833984, 18.8280029296875, 25.02862548828125, -11.704559326171875, 55.16729736328125, 18.35767364501953, 61.17152404785156, 47.31163024902344, 10.085357666015625, -17.072540283203125, 36.954254150390625, 6.5405731201171875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000333.npy"}
|
|
{"epoch": 0.4889867841409692, "step": 334, "batch_size": 64, "mean": 31.349750518798828, "std": 22.043792724609375, "min": -15.223129272460938, "p10": 5.0769683837890645, "median": 31.351470947265625, "p90": 54.745633697509774, "max": 90.50738525390625, "pos_frac": 0.9375, "sample": [81.58090209960938, 16.90283203125, -2.06475830078125, 53.27312469482422, 42.19007873535156, 11.272506713867188, 17.73877716064453, 10.635986328125, 46.34074020385742, 13.448928833007812, 10.120758056640625, 28.857990264892578, 34.02361297607422, 45.391876220703125, 25.13109588623047, 41.75868225097656, 19.850584030151367, 50.627349853515625, -15.223129272460938, 38.017608642578125, 8.468395233154297, 38.902095794677734, 21.484275817871094, -10.781290054321289, 57.91947937011719, 34.411529541015625, 47.77238464355469, 6.8357696533203125, 13.634525299072266, 4.3231964111328125, 2.98895263671875, 11.810260772705078, 20.699539184570312, 33.82594299316406, 55.376708984375, 50.821807861328125, 35.93749237060547, 18.796236038208008, 32.77867889404297, 39.18365478515625, 42.64794158935547, 12.862030029296875, 35.12060546875, 24.52099609375, 0.8150405883789062, 21.817943572998047, 79.20281982421875, 45.924713134765625, 85.04135131835938, 46.77857208251953, 40.46296691894531, 29.92426300048828, 24.78852081298828, 25.91911506652832, 26.597023010253906, 52.89092254638672, 44.840911865234375, 19.606403350830078, 20.004180908203125, 39.719032287597656, -3.2519893646240234, 90.50738525390625, 46.63603973388672, 57.94218444824219], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000334.npy"}
|
|
{"epoch": 0.49045521292217326, "step": 335, "batch_size": 64, "mean": 27.685340881347656, "std": 26.116199493408203, "min": -15.110403060913086, "p10": 0.09156951904296923, "median": 21.923943519592285, "p90": 55.192546844482436, "max": 128.14276123046875, "pos_frac": 0.890625, "sample": [43.94622802734375, 45.88527297973633, 18.1956787109375, 6.916755676269531, 48.65943145751953, 8.908498764038086, 18.05634880065918, 56.380455017089844, 44.501686096191406, -4.669267654418945, 2.2607059478759766, 33.881683349609375, 16.433937072753906, 35.757240295410156, 51.78135681152344, 85.05659484863281, 7.7684478759765625, 33.59577178955078, -11.69363021850586, 25.80933380126953, 19.927474975585938, -1.5896930694580078, 52.42076110839844, 45.841796875, 47.98186492919922, 49.074737548828125, 37.28512191772461, 33.93632507324219, 1.3817062377929688, 50.173309326171875, 128.14276123046875, -5.690010070800781, 44.26741409301758, 6.820075988769531, 57.10078811645508, 21.52836799621582, 77.10516357421875, 38.2518310546875, 14.536605834960938, 36.64434814453125, -0.1066741943359375, 75.24969482421875, 2.3141021728515625, 0.55413818359375, 8.105854034423828, 9.400039672851562, 40.36455154418945, 14.523422241210938, -15.110403060913086, 20.147003173828125, 19.82094383239746, 0.6287612915039062, 59.81715393066406, 22.31951904296875, 7.385496139526367, 22.958202362060547, 42.1748046875, 51.62549591064453, 2.28582763671875, -7.038127899169922, 16.7940731048584, 34.95953369140625, 11.997135162353516, 8.117862701416016], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000335.npy"}
|
|
{"epoch": 0.4919236417033774, "step": 336, "batch_size": 64, "mean": 34.13089370727539, "std": 29.321842193603516, "min": -44.246299743652344, "p10": 3.0404552459716805, "median": 30.8962459564209, "p90": 72.7255630493164, "max": 129.95404052734375, "pos_frac": 0.90625, "sample": [15.839218139648438, 18.743408203125, 85.86990356445312, 49.19447326660156, 17.95134162902832, 50.403968811035156, 17.24530792236328, 32.14739227294922, -7.039157867431641, 107.1201171875, -6.736289978027344, 12.643310546875, 22.381752014160156, 30.77503204345703, 129.95404052734375, -2.694009780883789, 82.26664733886719, 21.80766487121582, 22.66826629638672, 41.290985107421875, -14.240169525146484, 36.23841857910156, 65.10798645019531, 22.38506317138672, 2.7483367919921875, 35.02490997314453, 30.976367950439453, 39.67531967163086, 45.86151123046875, 26.939315795898438, 23.8266544342041, 15.774200439453125, 30.816123962402344, 14.997276306152344, 27.73217010498047, 41.80708312988281, -10.507049560546875, 33.300811767578125, 14.40316390991211, 44.94319152832031, 28.290374755859375, 57.97577667236328, 55.59039306640625, 34.08428955078125, 73.36810302734375, 10.468772888183594, -44.246299743652344, 25.809036254882812, 34.90711212158203, 40.654693603515625, 22.643985748291016, 36.5944938659668, 71.22630310058594, 28.720678329467773, 14.779571533203125, 54.04386901855469, 49.970603942871094, 97.98678588867188, 3.722064971923828, 37.55096435546875, 4.797996520996094, 54.79191589355469, 76.13758850097656, 44.86407470703125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000336.npy"}
|
|
{"epoch": 0.4933920704845815, "step": 337, "batch_size": 64, "mean": 30.773639678955078, "std": 22.813125610351562, "min": -14.600936889648438, "p10": 4.215460968017579, "median": 30.99468231201172, "p90": 60.56119918823243, "max": 85.46808624267578, "pos_frac": 0.953125, "sample": [75.51119995117188, 80.53032684326172, -14.600936889648438, -2.851226806640625, 53.60649108886719, 33.10028076171875, 0.6034774780273438, 14.832687377929688, 6.430469512939453, 31.020751953125, 5.882568359375, 32.0220947265625, 32.184242248535156, 47.59339904785156, 61.62206268310547, 15.378005981445312, 54.441734313964844, 44.322208404541016, 5.384742736816406, 41.971580505371094, 9.51275634765625, 25.597530364990234, 21.126306533813477, 53.431793212890625, 27.55752182006836, 22.069580078125, 33.60481262207031, 10.742721557617188, 10.775276184082031, 19.162321090698242, 10.36349868774414, 29.685096740722656, 51.246116638183594, 1.0967864990234375, 18.50179672241211, 24.227203369140625, 30.968612670898438, -7.175376892089844, 32.234474182128906, 35.91551971435547, 14.782428741455078, 85.46808624267578, 6.25384521484375, 7.188743591308594, 9.197257995605469, 17.158252716064453, 44.67463684082031, 38.29203796386719, 54.74458694458008, 37.28843688964844, 46.52273941040039, 32.594261169433594, 56.43731689453125, 67.46510314941406, 22.506057739257812, 79.36520385742188, 41.10585021972656, 39.47120666503906, 3.7143402099609375, 59.445953369140625, 39.7034912109375, 24.187484741210938, 1.2759437561035156, 61.039161682128906], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000337.npy"}
|
|
{"epoch": 0.4948604992657856, "step": 338, "batch_size": 64, "mean": 30.744945526123047, "std": 24.816186904907227, "min": -4.947551727294922, "p10": 3.4065391540527354, "median": 27.370797157287598, "p90": 72.5469841003418, "max": 80.85774230957031, "pos_frac": 0.9375, "sample": [56.12481689453125, 73.89776611328125, 3.0072479248046875, 4.441009521484375, 35.602203369140625, 5.8609161376953125, 7.124265670776367, 39.82948303222656, 73.66397857666016, 27.528472900390625, 4.338218688964844, 11.508171081542969, 13.339315414428711, 72.15985107421875, -3.56280517578125, 9.096786499023438, 35.94060516357422, 30.053878784179688, 43.049007415771484, 67.60783386230469, 41.525146484375, 1.4753189086914062, 34.98577880859375, 30.577499389648438, 31.212020874023438, 28.916702270507812, 34.674652099609375, -4.947551727294922, 11.490997314453125, 22.324485778808594, 23.16659164428711, 69.39921569824219, 72.71289825439453, -4.232341766357422, 15.43792724609375, 20.658592224121094, 6.603675842285156, 17.101470947265625, 51.988502502441406, 20.94997215270996, 40.7359619140625, 17.158788681030273, 7.695442199707031, -1.3013839721679688, 79.1287841796875, 8.443277359008789, 30.71331024169922, 11.719963073730469, 11.470779418945312, 27.21312141418457, 10.32305908203125, 4.811456680297852, 52.45314025878906, 80.85774230957031, 41.32377624511719, 58.75956726074219, 75.01754760742188, 26.058826446533203, 57.16792297363281, 44.59137725830078, 80.3785400390625, 14.022880554199219, 2.969259262084961, 49.33085632324219], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000338.npy"}
|
|
{"epoch": 0.49632892804698975, "step": 339, "batch_size": 64, "mean": 34.47539520263672, "std": 26.293350219726562, "min": -12.351924896240234, "p10": 1.260022735595705, "median": 32.47867965698242, "p90": 68.70642700195313, "max": 107.76193237304688, "pos_frac": 0.921875, "sample": [41.51512908935547, 39.62919998168945, 20.672733306884766, -9.148927688598633, 71.48957824707031, 16.402267456054688, 42.73579406738281, 40.85849380493164, 71.63223266601562, 6.6774749755859375, 107.76193237304688, 13.379188537597656, 55.78302001953125, 16.238067626953125, 44.89314270019531, 65.81854248046875, 5.632871627807617, 11.825531005859375, 16.926536560058594, 54.034759521484375, 40.94068908691406, 11.314964294433594, 43.25151824951172, -1.2343215942382812, 32.97972869873047, 23.712291717529297, 54.920013427734375, 17.25043487548828, 78.12083435058594, 104.08171081542969, -0.9076747894287109, 50.25876235961914, 30.847023010253906, 0.5307388305664062, 103.36666107177734, 35.652801513671875, 31.288116455078125, 10.019994735717773, 27.081817626953125, 20.331649780273438, 15.788238525390625, 22.740638732910156, 31.977630615234375, 43.382568359375, 19.909072875976562, 52.267356872558594, 2.9616851806640625, 63.607421875, 15.125263214111328, -12.351924896240234, 43.494564056396484, 69.944091796875, 40.07721710205078, 38.303199768066406, 31.28152847290039, 39.15138244628906, 0.1298980712890625, -5.482719421386719, 38.79767608642578, 28.45184326171875, 52.38932800292969, 31.440948486328125, 37.66047668457031, 56.81261444091797], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000339.npy"}
|
|
{"epoch": 0.4977973568281938, "step": 340, "batch_size": 64, "mean": 33.67200469970703, "std": 30.476184844970703, "min": -7.860015869140625, "p10": 2.7437366485595702, "median": 30.224653244018555, "p90": 67.39425659179688, "max": 150.76434326171875, "pos_frac": 0.921875, "sample": [39.12395477294922, 69.21541595458984, 49.49346160888672, 26.57233428955078, 37.14707946777344, 27.593822479248047, 79.83393859863281, 2.6939239501953125, 40.88592529296875, 8.089715957641602, 55.603302001953125, 150.76434326171875, 47.2928466796875, 1.0789241790771484, 67.16664123535156, 22.138519287109375, 66.15032196044922, 36.357906341552734, 32.18274688720703, 10.363445281982422, 32.78070831298828, 28.266559600830078, -1.2809982299804688, 94.69819641113281, 3.456390380859375, 39.66534423828125, -7.088289260864258, 33.700340270996094, 6.281349182128906, 43.495262145996094, 57.890716552734375, 40.48200988769531, 136.77151489257812, 32.81349182128906, 3.5907554626464844, 16.90968132019043, 18.996212005615234, 36.207191467285156, 7.096733093261719, 25.015634536743164, 44.302818298339844, 20.136207580566406, 7.36895751953125, 25.456554412841797, -3.6977081298828125, 74.2906723022461, 10.832672119140625, 9.175743103027344, 65.8824234008789, 42.69215393066406, 10.625389099121094, 67.49180603027344, 51.15643310546875, 14.186691284179688, 24.00798797607422, 26.7125244140625, 19.050941467285156, 2.859966278076172, 42.470794677734375, 41.565093994140625, -1.6803741455078125, 40.30720901489258, -7.860015869140625, 8.175987243652344], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000340.npy"}
|
|
{"epoch": 0.49926578560939794, "step": 341, "batch_size": 64, "mean": 37.13330078125, "std": 26.232938766479492, "min": -10.299606323242188, "p10": 5.408286666870119, "median": 36.11475944519043, "p90": 71.16452026367188, "max": 92.7139892578125, "pos_frac": 0.9375, "sample": [92.7139892578125, -4.307342529296875, 9.139915466308594, 7.514341354370117, 20.001426696777344, 50.38719177246094, 40.146026611328125, 69.79020690917969, 41.41722869873047, 27.219562530517578, 25.576425552368164, 43.72526550292969, 64.91675567626953, 45.118812561035156, 25.052541732788086, 11.576980590820312, 14.539596557617188, 86.70924377441406, 41.78142547607422, 41.76458740234375, 6.482978820800781, 78.0778579711914, 65.56362915039062, 54.628265380859375, 22.96532440185547, 71.4757080078125, 42.10527801513672, 43.93983459472656, 32.05684280395508, -10.299606323242188, 7.1004180908203125, 70.43841552734375, 66.6041259765625, -1.69696044921875, 12.402393341064453, 51.51133728027344, 50.97016906738281, -0.5673027038574219, 3.4270801544189453, 18.890365600585938, 82.14773559570312, 39.70152282714844, 32.52799606323242, 11.46600341796875, 4.947704315185547, 28.290985107421875, 16.772903442382812, 69.69342803955078, 2.1358566284179688, 64.71253967285156, 17.7043514251709, 43.811424255371094, 31.452224731445312, 14.443389892578125, 59.93727111816406, 26.511756896972656, 87.69770812988281, 40.255889892578125, 61.32264709472656, 19.61929702758789, 55.626068115234375, 28.94428062438965, 82.28507232666016, 13.662803649902344], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000341.npy"}
|
|
{"epoch": 0.5007342143906021, "step": 342, "batch_size": 64, "mean": 32.50803756713867, "std": 29.156147003173828, "min": -8.837249755859375, "p10": -1.4624504089355463, "median": 30.59192657470703, "p90": 76.82030334472657, "max": 108.76419067382812, "pos_frac": 0.875, "sample": [74.43582153320312, 0.01617431640625, -4.164825439453125, 30.84954833984375, 29.502368927001953, 28.504905700683594, 61.96398162841797, 21.467636108398438, 11.57269287109375, 15.903678894042969, 10.037956237792969, 51.39080810546875, 43.75665283203125, 20.085044860839844, 106.10284423828125, 19.875755310058594, -0.8472213745117188, 34.98065185546875, 31.865371704101562, -1.78619384765625, 30.334304809570312, 28.494735717773438, 40.36289978027344, 4.255805969238281, -1.7261199951171875, 33.203521728515625, 96.74217987060547, 23.872344970703125, 19.64168357849121, 6.7394256591796875, 1.4596748352050781, 108.76419067382812, 43.729393005371094, 3.809558868408203, 0.5386962890625, -5.164131164550781, 79.62621307373047, 107.93912506103516, 66.89659118652344, 26.34156036376953, -3.8513545989990234, 38.93944549560547, 37.55046081542969, -8.837249755859375, 55.15924072265625, 77.84222412109375, 39.4415283203125, 43.123802185058594, 25.658395767211914, 48.42181396484375, 80.5037841796875, 16.518699645996094, 14.160629272460938, 31.74755859375, 34.79331588745117, 6.3022003173828125, 32.43110275268555, 32.49769592285156, 0.45829010009765625, 38.083465576171875, 45.63619613647461, 55.33943176269531, 43.750938415527344, -6.532524108886719], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000342.npy"}
|
|
{"epoch": 0.5022026431718062, "step": 343, "batch_size": 64, "mean": 34.53588104248047, "std": 24.96697235107422, "min": -20.331771850585938, "p10": 4.863111114501954, "median": 35.161678314208984, "p90": 67.39685516357422, "max": 91.47411346435547, "pos_frac": 0.90625, "sample": [49.538177490234375, 13.336814880371094, 67.29560852050781, 22.69135284423828, 28.37757682800293, -1.833038330078125, 12.880146026611328, 53.446372985839844, 5.0273284912109375, 56.39418029785156, 5.7549896240234375, 46.86248779296875, 23.370281219482422, 26.51104736328125, 36.29548645019531, 14.703136444091797, 27.56647491455078, 44.28032684326172, 43.59058380126953, 70.10507202148438, 62.310550689697266, 80.73956298828125, 14.039703369140625, 62.56251525878906, 73.34791564941406, 34.37316131591797, 19.153776168823242, 9.733078002929688, 56.5767822265625, 45.218505859375, -20.331771850585938, 18.521717071533203, 72.00562286376953, -11.706741333007812, 28.26457977294922, 14.452972412109375, 50.16798400878906, 20.901742935180664, 6.109174728393555, 43.34632873535156, -3.692138671875, 73.12802124023438, 26.13066864013672, 22.089126586914062, 19.93238067626953, 67.44024658203125, 63.32826232910156, 43.12989044189453, 35.9501953125, 55.09223175048828, 20.783843994140625, 64.94073486328125, 45.42816925048828, 11.427314758300781, 37.27445983886719, 38.17057800292969, 36.75291442871094, 43.40203857421875, 29.040306091308594, 4.792732238769531, -2.1014938354492188, 66.0440673828125, 91.47411346435547, -5.6437530517578125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000343.npy"}
|
|
{"epoch": 0.5036710719530103, "step": 344, "batch_size": 64, "mean": 28.359664916992188, "std": 29.36441993713379, "min": -16.37274932861328, "p10": -1.169548034667968, "median": 23.09178924560547, "p90": 65.51976852416993, "max": 135.75808715820312, "pos_frac": 0.875, "sample": [7.255290985107422, 16.959875106811523, -16.37274932861328, -6.299335479736328, 17.642223358154297, 44.998451232910156, 19.28021812438965, 22.155197143554688, 0.6088409423828125, 17.392593383789062, 7.13115119934082, 47.904754638671875, -7.9870452880859375, 48.34611511230469, 8.823760986328125, 21.49378204345703, 42.36735534667969, 16.292694091796875, -0.454925537109375, 38.29228210449219, 0.42201995849609375, 13.641304016113281, 5.018640518188477, 26.17122459411621, 24.02838134765625, 28.503265380859375, 38.69801330566406, 63.02020263671875, 8.283285140991211, 6.627471923828125, 85.33880615234375, 26.481597900390625, 40.88062286376953, 3.9508304595947266, 20.838531494140625, 36.743896484375, 46.6226806640625, -12.07470703125, 57.61077880859375, 26.40448570251465, 95.89376831054688, 39.12602233886719, 28.119260787963867, 28.427146911621094, 1.9029426574707031, 135.75808715820312, 60.21517562866211, 44.12649154663086, 15.56273078918457, 79.11824035644531, 2.244314193725586, 1.972015380859375, 29.270397186279297, 86.6616439819336, 16.447418212890625, 29.357818603515625, -1.645944595336914, 34.704437255859375, 88.9355697631836, -15.784423828125, 40.446075439453125, 16.002363204956055, 66.59101104736328, -1.4758148193359375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000344.npy"}
|
|
{"epoch": 0.5051395007342144, "step": 345, "batch_size": 64, "mean": 34.33030319213867, "std": 26.923765182495117, "min": -30.57220458984375, "p10": 3.056636047363282, "median": 33.63469314575195, "p90": 67.07272720336915, "max": 131.99282836914062, "pos_frac": 0.9375, "sample": [3.7365875244140625, 131.99282836914062, 48.220829010009766, 31.12701416015625, 19.07537269592285, -5.286521911621094, 59.109375, 19.843223571777344, 35.22361373901367, 30.643753051757812, 1.9070606231689453, 41.44532775878906, 39.633453369140625, 70.71664428710938, 42.207496643066406, 38.022979736328125, -4.0167236328125, 40.66021728515625, 6.219501495361328, 30.595947265625, 2.765228271484375, 47.62736511230469, 18.078369140625, 22.13631820678711, 53.35839080810547, 39.97654724121094, 19.160465240478516, 64.92744445800781, 38.081459045410156, 34.14396667480469, 19.871856689453125, 67.99213409423828, 23.261688232421875, 52.10227966308594, 14.161834716796875, 45.84959411621094, 33.96803283691406, 71.4638671875, 22.030731201171875, 38.6005744934082, 21.92603302001953, 116.071533203125, 44.983951568603516, 11.544090270996094, 44.670440673828125, 42.76203918457031, 13.786331176757812, 69.34087371826172, 20.6221923828125, 27.524417877197266, 46.45252990722656, 2.1770095825195312, 22.712310791015625, 47.30335998535156, -7.500698089599609, 19.83765411376953, 6.747526168823242, 28.252334594726562, 50.14344787597656, 21.01184844970703, 48.05663299560547, 85.34835815429688, -30.57220458984375, 33.301353454589844], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000345.npy"}
|
|
{"epoch": 0.5066079295154186, "step": 346, "batch_size": 64, "mean": 30.47341537475586, "std": 26.77487564086914, "min": -21.124984741210938, "p10": -2.7227661132812453, "median": 23.638474464416504, "p90": 61.30450210571291, "max": 106.96741485595703, "pos_frac": 0.890625, "sample": [56.39148712158203, 7.4067840576171875, -9.817375183105469, 63.41007995605469, 91.983154296875, 12.491771697998047, 90.10153198242188, 23.802881240844727, 43.72449493408203, 106.96741485595703, -21.124984741210938, 16.680824279785156, 49.73808288574219, -14.780830383300781, 21.397682189941406, 36.22161865234375, 11.542167663574219, 51.49385070800781, 71.76568603515625, 9.776206970214844, 48.781707763671875, 13.6453857421875, -9.687286376953125, 53.422027587890625, 21.920639038085938, 41.0194206237793, 44.235374450683594, 15.002113342285156, 36.88550567626953, 18.2159423828125, 17.386995315551758, 10.282699584960938, 23.47406768798828, 21.509492874145508, 34.85918426513672, 44.62656784057617, 55.05525207519531, 12.924381256103516, 48.7358512878418, 14.104385375976562, 1.9009552001953125, 41.613006591796875, 36.43684387207031, 67.74055480957031, 22.21333122253418, 10.993751525878906, 52.275535583496094, 50.55234909057617, 10.738662719726562, 19.56032943725586, 38.34315490722656, 24.13113021850586, 54.60009765625, -7.49528694152832, 11.009706497192383, 45.30677795410156, 13.312929153442383, 17.15521812438965, 85.97672271728516, -4.7043609619140625, -4.8782196044921875, 3.6984100341796875, 28.712190628051758, 45.53257751464844], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000346.npy"}
|
|
{"epoch": 0.5080763582966226, "step": 347, "batch_size": 64, "mean": 31.062728881835938, "std": 27.517837524414062, "min": -29.5986328125, "p10": 1.6593750000000012, "median": 26.508434295654297, "p90": 66.43281860351563, "max": 130.65731811523438, "pos_frac": 0.921875, "sample": [1.1412506103515625, 42.19472122192383, 23.57647705078125, 59.19548416137695, 15.172172546386719, 34.249534606933594, 19.859115600585938, 29.452987670898438, 13.800323486328125, 30.866622924804688, 37.738555908203125, 12.719066619873047, 25.959365844726562, 13.50421142578125, 46.10436248779297, -29.5986328125, -11.954170227050781, 17.823272705078125, 70.04325866699219, 17.715938568115234, 34.32171630859375, -3.9884986877441406, 26.397842407226562, 63.34364318847656, 12.558685302734375, 40.61138153076172, 66.56866455078125, 57.17389678955078, 12.359447479248047, 26.121734619140625, 33.187156677246094, 21.222885131835938, 64.41776275634766, 48.48712921142578, 12.38827896118164, 11.084312438964844, 65.01947021484375, 42.42588806152344, 8.14453125, -18.06717300415039, 35.49639129638672, 74.0830078125, 2.8683319091796875, 56.76652908325195, 20.8043212890625, 29.34500503540039, 19.184677124023438, 39.477516174316406, 87.45736694335938, 12.332387924194336, 38.33140563964844, 66.1158447265625, 43.298099517822266, 130.65731811523438, 7.098791122436523, 33.828857421875, 16.839134216308594, -16.166046142578125, 69.55284881591797, 13.011455535888672, 70.09954071044922, 26.61902618408203, 17.052040100097656, 0.5180816650390625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000347.npy"}
|
|
{"epoch": 0.5095447870778267, "step": 348, "batch_size": 64, "mean": 30.16468048095703, "std": 26.948823928833008, "min": -18.96076011657715, "p10": -1.7261962890624982, "median": 31.315552711486816, "p90": 62.333581924438484, "max": 93.81663513183594, "pos_frac": 0.875, "sample": [60.96387481689453, 0.17043304443359375, 39.08612823486328, 54.234832763671875, 58.95293426513672, 14.849433898925781, -0.1087493896484375, 18.116357803344727, 62.82463073730469, -11.6505126953125, 34.2413215637207, 16.832109451293945, 52.121856689453125, 33.43287658691406, 31.790756225585938, 31.960201263427734, 2.1209659576416016, 38.73677062988281, 40.409725189208984, 2.4333343505859375, 44.14990234375, 64.13419342041016, 2.885040283203125, 89.29953002929688, 52.35679626464844, 57.20153045654297, -3.3163833618164062, 72.0473861694336, 5.996583938598633, 15.175460815429688, 52.945037841796875, 16.41042709350586, -4.115570068359375, 93.81663513183594, 88.04782104492188, 21.214385986328125, 35.7632942199707, 1.5493850708007812, 11.91556167602539, 30.840349197387695, -3.9762744903564453, 29.758834838867188, 5.969474792480469, 55.41260528564453, 58.373748779296875, 15.26424789428711, 34.968894958496094, 8.045465469360352, 53.64714813232422, 14.036056518554688, 1.1550064086914062, 38.960365295410156, 57.238983154296875, 29.711318969726562, 61.187801361083984, 38.494720458984375, -7.1499481201171875, 2.204540252685547, 9.093780517578125, 65.96871948242188, 46.1805419921875, 7.536865234375, -2.4193878173828125, -18.96076011657715], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000348.npy"}
|
|
{"epoch": 0.5110132158590308, "step": 349, "batch_size": 64, "mean": 25.91054916381836, "std": 29.782981872558594, "min": -27.220382690429688, "p10": -3.0449119567871095, "median": 23.10346031188965, "p90": 62.8754852294922, "max": 122.14718627929688, "pos_frac": 0.8125, "sample": [-2.939239501953125, -5.7603607177734375, 20.341949462890625, 29.867887496948242, 11.017967224121094, 16.004180908203125, 51.34400177001953, 2.631624221801758, 23.521312713623047, 43.2725830078125, 27.253093719482422, 35.13758087158203, 19.0849666595459, 20.904876708984375, 15.093160629272461, 4.484392166137695, 14.390039443969727, 7.5252838134765625, 31.127403259277344, 22.68999481201172, 64.3270263671875, 34.45396423339844, -0.9271087646484375, 8.452674865722656, 39.56217956542969, -0.9955711364746094, 44.27919006347656, -3.016387939453125, 28.918901443481445, 33.06177520751953, 122.14718627929688, -3.0571365356445312, -17.762470245361328, 1.1959075927734375, 49.60662078857422, -15.761817932128906, -23.31206512451172, 25.18701934814453, 92.56768798828125, 23.379863739013672, 69.75204467773438, -27.220382690429688, -4.697086334228516, 28.55957794189453, 57.582618713378906, 67.3799057006836, 32.84822082519531, 12.794937133789062, 106.23896789550781, 8.4185791015625, 2.465545654296875, 59.488555908203125, 0.6581268310546875, 23.054174423217773, 16.642822265625, 45.502891540527344, 5.548614501953125, 31.521453857421875, 24.636852264404297, 53.741363525390625, 33.39915466308594, 23.152746200561523, -0.276947021484375, 97.78228759765625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000349.npy"}
|
|
{"epoch": 0.5124816446402349, "step": 350, "batch_size": 64, "mean": 27.558395385742188, "std": 28.14163589477539, "min": -22.883224487304688, "p10": -2.161911010742185, "median": 20.798490524291992, "p90": 64.89746856689456, "max": 106.83795166015625, "pos_frac": 0.890625, "sample": [106.83795166015625, -22.577682495117188, 12.447124481201172, 71.49275970458984, 40.84589385986328, 36.215972900390625, 51.94068145751953, 0.347930908203125, -18.495864868164062, 19.54207992553711, 20.182586669921875, 57.144020080566406, -5.7574462890625, 29.141054153442383, 35.70021057128906, 5.31536865234375, 92.15261840820312, 37.419677734375, 48.99639892578125, 50.153472900390625, 68.22037506103516, 19.28280258178711, 11.749979019165039, 8.385398864746094, -3.1619873046875, 53.48692321777344, 10.947776794433594, 0.171600341796875, 43.336524963378906, 45.361412048339844, 40.74045181274414, 4.616634368896484, 30.81183624267578, 20.124366760253906, 0.5397109985351562, 15.524383544921875, 12.457893371582031, 38.07356262207031, -6.964424133300781, 22.7205810546875, 51.506683349609375, 81.0567626953125, 2.1137924194335938, 1.951629638671875, 5.9346923828125, 49.29255294799805, 27.631996154785156, 37.85270690917969, 32.92449951171875, 32.282928466796875, -7.529216766357422, 97.81599426269531, -22.883224487304688, 41.50614929199219, 21.41439437866211, 4.379539489746094, 11.187744140625, 15.599853515625, 5.69610595703125, 39.550453186035156, 14.399467468261719, 15.890602111816406, 19.658763885498047, 79.03175354003906], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000350.npy"}
|
|
{"epoch": 0.5139500734214391, "step": 351, "batch_size": 64, "mean": 34.37738037109375, "std": 24.538415908813477, "min": -12.101791381835938, "p10": 6.778041648864749, "median": 33.51721954345703, "p90": 65.31790008544922, "max": 97.97732543945312, "pos_frac": 0.90625, "sample": [28.824079513549805, 43.83856964111328, 63.17999267578125, 61.340087890625, 54.20207977294922, 29.384403228759766, -3.9073753356933594, 28.476829528808594, 62.75160217285156, 55.20367431640625, 17.1981201171875, 44.54259490966797, 73.13967895507812, 69.47627258300781, 57.77592468261719, 44.938255310058594, 40.3175048828125, 66.23414611816406, 48.715667724609375, 41.453948974609375, -5.096168518066406, 23.2576904296875, 50.39729309082031, 50.1893310546875, 24.82303237915039, 22.09006118774414, 13.933555603027344, 40.0792350769043, -12.101791381835938, -0.27850341796875, 15.788818359375, 18.913856506347656, -7.735836029052734, 38.265953063964844, 9.953125, 97.97732543945312, 17.079429626464844, 22.387451171875, 40.6868896484375, 22.87200927734375, 10.000839233398438, 34.713287353515625, 12.78884506225586, 91.57416534423828, 17.372772216796875, 5.579751968383789, 49.959938049316406, 9.574050903320312, 16.510231018066406, -10.298385620117188, 18.102479934692383, 48.22796630859375, 27.119461059570312, 18.404159545898438, 85.15582275390625, 33.02268981933594, 38.49364471435547, 77.79953002929688, 12.628196716308594, 36.54310607910156, 47.17937469482422, 47.543479919433594, 34.011749267578125, 27.5762939453125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000351.npy"}
|
|
{"epoch": 0.5154185022026432, "step": 352, "batch_size": 64, "mean": 26.859298706054688, "std": 27.565814971923828, "min": -23.005859375, "p10": -3.8504777908325196, "median": 24.386627197265625, "p90": 62.98154411315919, "max": 132.36622619628906, "pos_frac": 0.828125, "sample": [46.24268341064453, 60.77479934692383, 33.14781951904297, 19.953407287597656, -5.93231201171875, 9.638385772705078, 46.32238006591797, 50.96931457519531, 29.60430908203125, 24.15973663330078, 82.87530517578125, 77.28144836425781, 25.434894561767578, 26.611839294433594, 58.4476432800293, -3.8459529876708984, 6.33598518371582, 83.95123291015625, 11.091032028198242, 29.417329788208008, 24.61351776123047, 132.36622619628906, 25.733322143554688, 63.92729187011719, 14.374496459960938, 21.774505615234375, -3.0359153747558594, -5.8409423828125, 28.445072174072266, 11.764495849609375, 51.21575927734375, 65.16722869873047, 13.286882400512695, 8.207296371459961, 64.51585388183594, 27.862564086914062, -1.499908447265625, 10.719066619873047, 14.43768310546875, 27.41747283935547, 42.78485107421875, 4.39019775390625, 48.20164108276367, -23.005859375, -7.6150054931640625, -3.909637451171875, -3.8524169921875, -2.8913650512695312, 47.9739990234375, 1.0035781860351562, 26.59003257751465, 2.282745361328125, 7.200099945068359, 21.119949340820312, 25.679569244384766, 27.26470184326172, 59.06761169433594, -4.627296447753906, 52.50823974609375, 25.864545822143555, 21.003997802734375, 11.763824462890625, 18.516246795654297, 13.74752426147461], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000352.npy"}
|
|
{"epoch": 0.5168869309838473, "step": 353, "batch_size": 64, "mean": 26.92256736755371, "std": 27.726051330566406, "min": -23.348133087158203, "p10": -4.986428070068359, "median": 24.842851638793945, "p90": 66.68102798461915, "max": 86.840087890625, "pos_frac": 0.84375, "sample": [26.147964477539062, 28.061534881591797, 23.17542266845703, 77.00381469726562, 13.238105773925781, 28.613056182861328, 11.433622360229492, 2.990039825439453, 71.30242919921875, 14.657108306884766, -0.54986572265625, -8.950996398925781, 4.688545227050781, 68.23360443115234, 50.44834899902344, 52.43975067138672, 6.795871734619141, 3.3187408447265625, 86.840087890625, 32.476600646972656, 20.82384490966797, 38.73386001586914, 24.506771087646484, 9.775140762329102, -5.508182525634766, 55.61528015136719, -19.38294219970703, 37.98235321044922, 4.240287780761719, 39.560028076171875, 0.5576267242431641, 4.890586853027344, 63.058349609375, 7.4422454833984375, -3.326751708984375, -23.348133087158203, 54.78294372558594, 4.00372314453125, 48.133148193359375, 1.3969593048095703, 40.9068603515625, 79.16766357421875, 17.654075622558594, 25.178932189941406, 25.584224700927734, 46.822227478027344, 46.66516876220703, 39.76448059082031, -22.748748779296875, 77.02569580078125, 23.91820526123047, -5.078582763671875, 49.918434143066406, 61.23392868041992, 52.43962097167969, 85.44845581054688, 9.363319396972656, 10.411331176757812, 33.942474365234375, -12.68031120300293, 10.165210723876953, 41.39171600341797, -4.771400451660156, 35.020423889160156], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000353.npy"}
|
|
{"epoch": 0.5183553597650514, "step": 354, "batch_size": 64, "mean": 31.843639373779297, "std": 21.73491859436035, "min": -7.249900817871094, "p10": 4.536321258544922, "median": 32.944753646850586, "p90": 53.794873046875004, "max": 114.24858856201172, "pos_frac": 0.96875, "sample": [53.550926208496094, 53.89942169189453, 55.585723876953125, 17.13672637939453, 68.40380859375, 18.208587646484375, 48.81755065917969, -0.4352684020996094, 41.03087615966797, 1.3120269775390625, 10.548751831054688, 3.471609115600586, 17.30394744873047, 69.71957397460938, 35.856361389160156, 34.363258361816406, 13.384628295898438, 51.547576904296875, 44.46818542480469, 37.92429733276367, 22.984451293945312, 36.248558044433594, 19.585670471191406, 26.696136474609375, 2.7861099243164062, 40.5184211730957, 49.05297088623047, 34.730140686035156, 34.938880920410156, 22.50539779663086, 34.953704833984375, 8.621816635131836, 49.76865768432617, 47.71726989746094, 35.58186721801758, 27.71484375, -7.249900817871094, 35.03961944580078, 29.252357482910156, 0.58251953125, 23.915695190429688, 19.418235778808594, 43.0025634765625, 5.4423980712890625, 30.32811737060547, 41.68910217285156, 23.482391357421875, 114.24858856201172, 42.36772155761719, 10.958984375, 12.684341430664062, 41.774444580078125, 16.791778564453125, 33.707000732421875, 4.148002624511719, 66.93927001953125, 25.922042846679688, 48.56758117675781, 84.02392578125, 12.483634948730469, 47.1442756652832, 32.1825065612793, 13.0584716796875, 15.583759307861328], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000354.npy"}
|
|
{"epoch": 0.5198237885462555, "step": 355, "batch_size": 64, "mean": 33.53764343261719, "std": 29.216278076171875, "min": -6.269006729125977, "p10": 5.371367645263673, "median": 23.807796478271484, "p90": 85.78272781372073, "max": 109.44403076171875, "pos_frac": 0.921875, "sample": [8.259056091308594, 108.544921875, 11.385536193847656, 94.93014526367188, 7.1675262451171875, 33.80738067626953, 34.920738220214844, 81.42091369628906, -0.6379241943359375, 62.79005432128906, 23.917160034179688, 91.78240966796875, -1.6971054077148438, 6.196008682250977, 40.53831481933594, 23.69843292236328, 24.640174865722656, 109.44403076171875, 14.263845443725586, 52.54814147949219, 38.71099853515625, 18.889015197753906, 20.645652770996094, 10.002208709716797, 20.146724700927734, 46.273162841796875, 87.6520767211914, 12.703834533691406, 7.001335144042969, 15.704063415527344, 35.19267654418945, 59.890777587890625, 8.820693969726562, -1.9248199462890625, 37.533512115478516, 5.017950057983398, 62.36311340332031, 93.41938781738281, 30.02923583984375, 104.2304458618164, 16.729270935058594, 42.54022979736328, 3.528308868408203, 10.354339599609375, 51.61359405517578, 61.469825744628906, 23.669164657592773, 43.376739501953125, 12.664302825927734, 39.333335876464844, 22.192790985107422, 22.202590942382812, 53.79499816894531, 26.242164611816406, -6.269006729125977, 18.720382690429688, 18.564666748046875, 15.67596435546875, 22.51589584350586, -2.904865264892578, 25.5660400390625, 19.33411407470703, 36.612449645996094, 28.660018920898438], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000355.npy"}
|
|
{"epoch": 0.5212922173274597, "step": 356, "batch_size": 64, "mean": 22.78453826904297, "std": 28.312274932861328, "min": -23.502723693847656, "p10": -6.611024093627929, "median": 13.863311767578125, "p90": 64.6887405395508, "max": 105.53865051269531, "pos_frac": 0.78125, "sample": [36.51915740966797, 8.062568664550781, 39.01640319824219, 19.54924774169922, -4.103244781494141, -0.6641483306884766, 3.8411331176757812, 45.851707458496094, 19.408889770507812, 19.53997802734375, 19.401317596435547, 10.976682662963867, 84.25395202636719, 105.53865051269531, 8.291107177734375, 36.30424880981445, -3.6083946228027344, 2.2262725830078125, -10.832923889160156, -3.3549652099609375, -7.709175109863281, 70.99349212646484, 4.2809295654296875, 14.599273681640625, 38.89509582519531, 46.54853057861328, 13.127349853515625, 94.75796508789062, 5.242095947265625, 26.203290939331055, -5.423255920410156, 34.41032409667969, 12.930999755859375, 18.41058349609375, 43.42463684082031, 4.422191619873047, 52.27471160888672, 73.32984161376953, -18.805513381958008, 14.776016235351562, 37.62997817993164, -1.68292236328125, 8.914249420166016, 57.11656188964844, 6.610540390014648, 3.7933425903320312, 38.405059814453125, -7.120067596435547, 67.9339599609375, -14.764163970947266, 16.632766723632812, -1.82354736328125, -7.7833709716796875, 2.1405792236328125, 10.21513557434082, 11.523033142089844, 11.303977966308594, 30.79082489013672, -23.502723693847656, 53.266632080078125, 74.85772705078125, 53.51606750488281, 12.177558898925781, 45.152305603027344], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000356.npy"}
|
|
{"epoch": 0.5227606461086637, "step": 357, "batch_size": 64, "mean": 31.656475067138672, "std": 24.30622100830078, "min": -16.50714111328125, "p10": 3.7257846832275394, "median": 27.9426908493042, "p90": 62.3351318359375, "max": 103.48207092285156, "pos_frac": 0.921875, "sample": [47.675071716308594, 80.26419067382812, 14.826568603515625, 30.66057586669922, 57.65071105957031, 18.01629638671875, 46.03681945800781, 37.92878723144531, 8.588768005371094, 62.483795166015625, 40.59864807128906, 7.9143218994140625, 13.672904968261719, 35.564422607421875, 11.415435791015625, 20.114479064941406, 31.65862274169922, 22.833412170410156, 61.40916442871094, -14.009811401367188, 72.82292175292969, 22.85474395751953, 21.360870361328125, 43.22796630859375, -16.50714111328125, -6.440240859985352, 38.64543151855469, 26.29369354248047, -1.1346092224121094, 61.988250732421875, 61.811004638671875, 18.110427856445312, 39.2869873046875, 10.068199157714844, 33.79133605957031, 1.539022445678711, 23.74390411376953, 103.48207092285156, 40.4505615234375, 77.9210205078125, 31.33167266845703, 3.6156959533691406, 54.40039825439453, 33.98289489746094, 20.100448608398438, 13.566535949707031, 22.903213500976562, 26.55170440673828, 27.837230682373047, 53.71117401123047, 28.04815101623535, 55.3485107421875, 16.774154663085938, 14.01629638671875, 24.31735610961914, 21.94406509399414, 3.9826583862304688, 66.42994689941406, 29.243919372558594, 43.73240661621094, 10.7154541015625, 82.90982055664062, -1.4394683837890625, 33.37043762207031], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000357.npy"}
|
|
{"epoch": 0.5242290748898678, "step": 358, "batch_size": 64, "mean": 29.16506004333496, "std": 27.73453140258789, "min": -16.76531982421875, "p10": 3.1565925598144533, "median": 21.16253089904785, "p90": 64.41500778198244, "max": 136.02865600585938, "pos_frac": 0.953125, "sample": [14.812507629394531, 85.35151672363281, 11.074615478515625, 65.65730285644531, 21.74135971069336, 21.35173797607422, 8.148521423339844, 38.023529052734375, 20.384109497070312, 22.172815322875977, 17.60138702392578, 20.145782470703125, 10.00482177734375, 4.1477508544921875, 2.8076820373535156, 60.80914306640625, 4.2230224609375, 9.200180053710938, 41.6064453125, 8.31976318359375, -8.065786361694336, 3.84271240234375, 0.4708404541015625, 15.457813262939453, 20.973323822021484, -16.76531982421875, 46.843780517578125, -1.468414306640625, 67.20028686523438, 80.7544937133789, 34.16926574707031, 44.20089340209961, 44.25614929199219, 3.0763168334960938, 52.397125244140625, 52.69476318359375, 35.211090087890625, 23.194286346435547, 1.1639575958251953, 30.00677490234375, 47.422271728515625, 70.16838073730469, 5.169471740722656, 51.75061798095703, 41.08746337890625, 3.343902587890625, 48.495391845703125, 56.677703857421875, 8.190399169921875, 12.976612091064453, 32.405364990234375, 13.063896179199219, 7.491668701171875, 7.963884353637695, 89.27313232421875, 136.02865600585938, 7.518011093139648, 61.516319274902344, 29.960830688476562, 16.351409912109375, 53.16677474975586, 9.911582946777344, 35.16569519042969, 4.266008377075195], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000358.npy"}
|
|
{"epoch": 0.5256975036710719, "step": 359, "batch_size": 64, "mean": 32.468040466308594, "std": 28.898197174072266, "min": -21.433868408203125, "p10": 0.9169551849365258, "median": 26.37386703491211, "p90": 78.59049682617187, "max": 111.612548828125, "pos_frac": 0.890625, "sample": [-19.326324462890625, 12.871345520019531, 13.216659545898438, 79.98713684082031, -21.433868408203125, 111.612548828125, 4.894989013671875, 78.54132080078125, 25.919349670410156, -11.837993621826172, 24.821014404296875, 6.140296936035156, 30.84234619140625, 58.53763198852539, 12.270811080932617, 25.498382568359375, 35.894813537597656, 10.980804443359375, 26.828384399414062, 52.83196258544922, 38.136749267578125, 23.238033294677734, 82.4369888305664, 28.82408332824707, 3.231830596923828, 28.64767074584961, 18.3051815032959, 31.778846740722656, 31.05462646484375, 14.34930419921875, 63.38201904296875, 51.36953353881836, 107.64772033691406, 25.789962768554688, 22.416763305664062, 41.449440002441406, 33.80097198486328, 47.426361083984375, -2.7488555908203125, 83.63115692138672, 10.873144149780273, 25.85223388671875, 56.38136291503906, 55.43571472167969, 34.97761535644531, 16.96878433227539, 47.82695770263672, -1.3889083862304688, 23.43970489501953, 92.896240234375, -0.07513427734375, -9.336288452148438, 36.98243713378906, 9.900390625, 16.222667694091797, 58.46873474121094, 33.207618713378906, 23.881778717041016, 78.611572265625, 51.963462829589844, 12.293289184570312, 49.858665466308594, 8.130407333374023, 11.322380065917969], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000359.npy"}
|
|
{"epoch": 0.527165932452276, "step": 360, "batch_size": 64, "mean": 29.71305274963379, "std": 23.531862258911133, "min": -20.753082275390625, "p10": 5.0232990264892585, "median": 24.8002872467041, "p90": 56.39331817626954, "max": 95.16033935546875, "pos_frac": 0.953125, "sample": [4.830169677734375, 39.234130859375, 25.188697814941406, 18.530426025390625, -9.540491104125977, 62.05389404296875, 16.99237823486328, 46.46466827392578, 40.59130096435547, 29.753982543945312, 1.647735595703125, 39.891937255859375, 18.495399475097656, 26.574886322021484, 45.515174865722656, 14.568929672241211, 9.367162704467773, -6.643028259277344, 42.760894775390625, 16.564727783203125, 72.41969299316406, 31.292993545532227, 17.698640823364258, 42.013916015625, 21.576068878173828, 51.290435791015625, 45.89900207519531, 24.45166778564453, 36.686710357666016, 80.81108856201172, 95.16033935546875, 52.29573059082031, 9.943016052246094, -20.753082275390625, 11.405920028686523, 26.729812622070312, 25.148906707763672, 14.950410842895508, 52.60090637207031, 13.976722717285156, 7.913171768188477, 89.54353332519531, 9.193748474121094, 24.227794647216797, 34.73469543457031, 5.5550384521484375, 5.473934173583984, 22.04400634765625, 56.93402862548828, 24.18017578125, 0.5147857666015625, 50.169891357421875, 0.21121788024902344, 75.91110229492188, 55.13166046142578, 18.279369354248047, 48.03662872314453, 24.27051544189453, 17.23244285583496, 34.626678466796875, 10.730255126953125, 26.0339412689209, 19.13518524169922, 53.109718322753906], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000360.npy"}
|
|
{"epoch": 0.5286343612334802, "step": 361, "batch_size": 64, "mean": 28.376157760620117, "std": 25.25600814819336, "min": -18.446258544921875, "p10": 2.174681282043457, "median": 24.963726043701172, "p90": 61.204568481445314, "max": 94.2508773803711, "pos_frac": 0.90625, "sample": [11.15452766418457, 29.362449645996094, 28.358524322509766, 15.974365234375, 9.557243347167969, 23.282732009887695, 34.104331970214844, 30.857091903686523, 93.75801086425781, 27.726318359375, 17.956954956054688, 30.676116943359375, -18.446258544921875, 53.425811767578125, 9.578659057617188, -12.962629318237305, 27.40666389465332, 61.02973937988281, 2.764493942260742, 13.401762008666992, 19.13379669189453, -4.452606201171875, 36.936729431152344, 14.83523178100586, 48.3985595703125, 71.08941650390625, 25.217864990234375, 56.446685791015625, 8.565643310546875, 23.462257385253906, 5.917449951171875, 4.615705490112305, 12.746635437011719, 2.8797378540039062, 72.15229034423828, 50.42926025390625, 6.145210266113281, 61.27949523925781, 94.2508773803711, 18.503448486328125, 49.926849365234375, 56.96149826049805, 79.34115600585938, 42.62201690673828, 6.783668518066406, 39.95410919189453, 42.99768829345703, 36.19049072265625, 25.103057861328125, 2.1152801513671875, 18.88853645324707, 15.60177993774414, 50.01384735107422, 2.313283920288086, 15.475631713867188, 33.12094497680664, 43.0653076171875, -2.725055694580078, 34.2972412109375, 24.82439422607422, 10.275054931640625, -7.484138488769531, -0.2952880859375, 79.18621063232422], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000361.npy"}
|
|
{"epoch": 0.5301027900146843, "step": 362, "batch_size": 64, "mean": 33.91130828857422, "std": 27.831872940063477, "min": -17.039688110351562, "p10": 1.8575315475463896, "median": 30.803786277770996, "p90": 72.03871688842774, "max": 103.3932876586914, "pos_frac": 0.90625, "sample": [19.105396270751953, 72.78730010986328, 64.51485443115234, 66.95848846435547, 0.65191650390625, 4.791233062744141, 102.69601440429688, 46.62689971923828, 17.358226776123047, 44.681121826171875, 43.65838623046875, 13.703939437866211, 20.78374481201172, 52.38160705566406, 45.48615264892578, 45.243011474609375, 10.102838516235352, -1.8797931671142578, 44.77399444580078, 33.81256103515625, 18.14476776123047, 15.914276123046875, 13.766220092773438, 8.686492919921875, -17.039688110351562, -0.111785888671875, 28.030868530273438, 62.47285461425781, 4.670633316040039, 103.3932876586914, 77.21343231201172, 35.31709289550781, 7.0967254638671875, 26.68475341796875, 14.935688018798828, 29.961103439331055, 85.93232727050781, -8.765758514404297, 31.646469116210938, 8.25067138671875, 40.66327667236328, 8.59263801574707, 79.35842895507812, 88.92340087890625, 20.725425720214844, 39.706153869628906, 22.090320587158203, 35.9813117980957, 27.770309448242188, 70.29202270507812, 19.835670471191406, 64.808349609375, 32.620567321777344, 35.77143859863281, 17.980018615722656, 63.356224060058594, 54.522796630859375, -12.45074462890625, 46.78718566894531, 21.596038818359375, 55.652183532714844, 37.19273376464844, -2.5680465698242188, 6.6778564453125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000362.npy"}
|
|
{"epoch": 0.5315712187958884, "step": 363, "batch_size": 64, "mean": 39.09236145019531, "std": 32.99383544921875, "min": -25.24774169921875, "p10": 6.662544250488282, "median": 30.78058433532715, "p90": 82.38440856933593, "max": 136.0556182861328, "pos_frac": 0.921875, "sample": [-7.625511169433594, 29.94881820678711, 86.52278900146484, 32.6065559387207, -9.41348648071289, 21.973846435546875, 52.105079650878906, 71.37445831298828, 11.870651245117188, 16.341699600219727, 37.914249420166016, 6.2176666259765625, 71.49728393554688, 30.88162612915039, 16.400466918945312, 111.15762329101562, -5.452920913696289, 43.93487548828125, 15.737764358520508, 82.45164489746094, 40.42761993408203, 60.20073699951172, 40.577911376953125, 81.19019317626953, 12.295503616333008, 45.78284454345703, 101.24642944335938, 42.120025634765625, 7.700592041015625, 69.1160888671875, 20.405540466308594, 39.95429229736328, 20.99713134765625, 8.41440200805664, 67.6571044921875, 56.11024475097656, 18.647523880004883, 25.822507858276367, 96.21058654785156, 89.8672103881836, 74.95846557617188, 17.47887420654297, 4.187397003173828, 76.75212097167969, -7.918718338012695, 24.189529418945312, 49.435768127441406, -25.24774169921875, 26.747055053710938, 82.01399230957031, 22.185264587402344, 9.54952621459961, 12.449790954589844, 42.47746276855469, 82.22752380371094, 11.644920349121094, 17.50102996826172, 14.034637451171875, 8.104747772216797, 27.661026000976562, 136.0556182861328, 41.394683837890625, 30.679542541503906, 62.15892028808594], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000363.npy"}
|
|
{"epoch": 0.5330396475770925, "step": 364, "batch_size": 64, "mean": 28.14519691467285, "std": 26.875614166259766, "min": -16.416404724121094, "p10": 0.5948112487792986, "median": 22.78134536743164, "p90": 64.51367492675782, "max": 109.86663818359375, "pos_frac": 0.890625, "sample": [15.912740707397461, 39.986061096191406, -13.440353393554688, 3.434030532836914, 3.0102157592773438, 51.31538391113281, -9.099533081054688, 31.08734130859375, 51.546356201171875, 100.69587707519531, 9.164020538330078, 68.11975860595703, 54.508209228515625, 22.815383911132812, 2.2671432495117188, 11.638046264648438, 9.812423706054688, 3.6317672729492188, 48.45440673828125, -8.9290771484375, 34.1547966003418, 28.35049057006836, 43.70948028564453, 9.996219635009766, 16.996788024902344, 15.40629768371582, 14.857616424560547, 75.00103759765625, 28.826473236083984, 30.485353469848633, 57.57599639892578, 65.29391479492188, 15.507469177246094, 37.45849609375, 16.403377532958984, 28.334136962890625, 24.62591552734375, 7.727375030517578, 57.92481231689453, -0.1219024658203125, 2.9997940063476562, 95.44557189941406, 22.74730682373047, 29.985563278198242, 18.540786743164062, -16.416404724121094, 41.208465576171875, 25.934066772460938, 46.25812530517578, 8.6070556640625, 17.595531463623047, 44.495147705078125, -5.322004318237305, 23.660728454589844, 109.86663818359375, 12.286453247070312, 21.5384521484375, 66.35963439941406, -0.20252037048339844, 62.693115234375, 13.666557312011719, 21.424072265625, 27.541885375976562, 5.934234619140625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000364.npy"}
|
|
{"epoch": 0.5345080763582967, "step": 365, "batch_size": 64, "mean": 27.50690460205078, "std": 26.08824348449707, "min": -5.658348083496094, "p10": 3.131523132324219, "median": 19.568644523620605, "p90": 58.29841003417969, "max": 128.43862915039062, "pos_frac": 0.90625, "sample": [53.93779754638672, -5.658348083496094, 7.688468933105469, 5.789707183837891, 7.729269027709961, 11.025203704833984, 46.33349609375, 3.9732894897460938, 14.518112182617188, 7.822540283203125, 43.38429260253906, 41.79015350341797, -2.915803909301758, 58.812835693359375, 3.7489471435546875, 8.9847412109375, 40.783843994140625, 70.67962646484375, 48.12724304199219, 53.176292419433594, 20.250473022460938, 15.10954475402832, 26.589431762695312, 2.866912841796875, 11.82094955444336, 10.155525207519531, 18.9442081451416, 27.18777084350586, 42.228755950927734, -5.589946746826172, 9.177810668945312, -1.9217338562011719, 24.202720642089844, 20.458637237548828, 30.010177612304688, 53.386192321777344, 10.297832489013672, 20.19308090209961, 5.1278839111328125, 4.37451171875, 21.971694946289062, 9.437606811523438, 71.8671875, 4.656946182250977, 15.061933517456055, 77.63752746582031, 40.92424392700195, 18.072891235351562, 24.442184448242188, 11.511520385742188, 41.56005859375, 56.64197540283203, -1.1872367858886719, 37.597068786621094, 16.73623275756836, 27.712642669677734, 128.43862915039062, 87.78048706054688, 12.957847595214844, -2.4200782775878906, 64.87901306152344, 57.09808349609375, 16.049835205078125, 56.41107940673828], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000365.npy"}
|
|
{"epoch": 0.5359765051395007, "step": 366, "batch_size": 64, "mean": 29.27224349975586, "std": 23.412845611572266, "min": -7.074731826782227, "p10": 3.2916528701782237, "median": 23.773966789245605, "p90": 67.7847999572754, "max": 89.33551025390625, "pos_frac": 0.953125, "sample": [14.387046813964844, 23.29311180114746, 49.82225799560547, 12.088119506835938, 17.66717529296875, 6.241230010986328, 9.892265319824219, 17.516460418701172, 25.743484497070312, 89.33551025390625, 13.298627853393555, 5.815338134765625, 66.32880401611328, 69.87577819824219, 37.616275787353516, 26.30011749267578, 20.481414794921875, 0.7433605194091797, -7.074731826782227, 9.769214630126953, 43.19289779663086, 15.43975830078125, 73.14590454101562, -3.6774463653564453, 68.40879821777344, 74.3461685180664, 42.37000274658203, 16.313278198242188, 41.846771240234375, 24.25482177734375, 20.484464645385742, 2.83807373046875, 5.516761779785156, 15.576057434082031, 45.2884521484375, 1.9948959350585938, 79.76673889160156, 47.007789611816406, 26.43572998046875, 20.292150497436523, 32.02366638183594, 35.00257110595703, 13.124090194702148, 0.6578655242919922, 26.386150360107422, 11.051706314086914, 60.986602783203125, -5.968929290771484, 37.994659423828125, 71.72183227539062, 60.21693420410156, 22.51660919189453, 46.38819885253906, 15.808242797851562, 59.18450927734375, 32.438568115234375, 4.350004196166992, 39.747100830078125, 6.729213714599609, 15.558197021484375, 30.222536087036133, 29.529451370239258, 43.890289306640625, 13.880531311035156], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000366.npy"}
|
|
{"epoch": 0.5374449339207048, "step": 367, "batch_size": 64, "mean": 25.49578094482422, "std": 22.78716278076172, "min": -16.36359405517578, "p10": 1.581372070312502, "median": 21.96495819091797, "p90": 53.31651763916016, "max": 100.38272094726562, "pos_frac": 0.90625, "sample": [3.554859161376953, 9.706878662109375, 49.538902282714844, 7.756618499755859, 98.96357727050781, 20.218149185180664, 18.220657348632812, 36.6668815612793, 58.14189147949219, 4.6348724365234375, 25.79376220703125, -0.5747013092041016, 6.7929229736328125, 33.714447021484375, 31.281742095947266, 51.043739318847656, 19.711441040039062, 8.3956298828125, 39.60304260253906, 0.7355918884277344, 28.413070678710938, 51.85112762451172, 19.460166931152344, 14.496452331542969, 17.652923583984375, 36.22486877441406, 54.37632751464844, 17.973268508911133, 50.02832794189453, 7.148406982421875, 45.879600524902344, 21.42230224609375, 24.876541137695312, 27.127952575683594, 29.681991577148438, 18.14281463623047, 46.75511932373047, 28.476343154907227, 61.14869689941406, 16.684528350830078, 23.46587562561035, 6.340545654296875, -9.556236267089844, 16.161235809326172, 20.82232666015625, -6.778339385986328, -13.861785888671875, 34.000274658203125, 53.944541931152344, 22.507614135742188, 13.839996337890625, -13.723419189453125, 35.64936828613281, 18.920440673828125, 26.42751693725586, 36.23145294189453, 11.363361358642578, 5.373504638671875, 56.01271057128906, 26.691253662109375, 8.078872680664062, -16.36359405517578, 100.38272094726562, 34.07807159423828], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000367.npy"}
|
|
{"epoch": 0.5389133627019089, "step": 368, "batch_size": 64, "mean": 30.921649932861328, "std": 24.50312042236328, "min": -7.295450210571289, "p10": 5.002363395690918, "median": 28.262661933898926, "p90": 59.38735656738282, "max": 111.9737548828125, "pos_frac": 0.9375, "sample": [30.041566848754883, -5.949703216552734, 32.02101516723633, 18.378494262695312, 39.430450439453125, 47.80039978027344, 7.849964141845703, -4.523529052734375, 60.02703094482422, 19.936126708984375, 40.369449615478516, 35.22428894042969, 66.21086120605469, 6.2208251953125, 31.407825469970703, 29.254825592041016, 28.019508361816406, 24.279327392578125, 6.697532653808594, 28.481002807617188, 57.89478302001953, 42.19860076904297, 26.007293701171875, 36.8953857421875, 81.37664794921875, 6.940704345703125, 16.81249237060547, -1.6924495697021484, 8.310380935668945, -7.295450210571289, 51.43663024902344, 2.824554443359375, 30.786827087402344, 15.267852783203125, 1.5644989013671875, 19.566078186035156, 42.324920654296875, 28.044321060180664, 22.979251861572266, 18.70384979248047, 12.143255233764648, 82.40228271484375, 26.04241180419922, 48.16851806640625, 33.80989074707031, 42.90283966064453, 111.56769561767578, 16.953643798828125, 4.948246002197266, 10.975494384765625, 31.85192108154297, 64.1006851196289, 34.07316970825195, 40.68836212158203, 55.62331771850586, 5.128637313842773, 25.357276916503906, 111.9737548828125, 46.138084411621094, 41.79011154174805, 25.29570770263672, 20.799591064453125, 30.54568862915039, 13.58050537109375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000368.npy"}
|
|
{"epoch": 0.540381791483113, "step": 369, "batch_size": 64, "mean": 25.208810806274414, "std": 22.390411376953125, "min": -21.951133728027344, "p10": -1.8395601272582998, "median": 23.230974197387695, "p90": 55.85286407470704, "max": 81.6043701171875, "pos_frac": 0.859375, "sample": [2.5762939453125, 8.389242172241211, 81.6043701171875, 10.847883224487305, -6.916481018066406, 9.523843765258789, 10.363521575927734, 51.64479446411133, 43.43490982055664, 68.991943359375, 40.601871490478516, 4.1589508056640625, 8.6617431640625, 11.801506042480469, 28.392303466796875, 56.69672393798828, 25.354019165039062, 48.960147857666016, -0.3505744934082031, 23.723587036132812, 37.44780731201172, 22.991470336914062, 20.232025146484375, 13.873046875, 42.456085205078125, -6.72467041015625, 2.0375595092773438, 12.297264099121094, 14.679039001464844, 26.275367736816406, 45.99998474121094, 51.90290069580078, 53.88385772705078, -21.951133728027344, 12.896453857421875, 35.27366256713867, 47.98780822753906, 35.246925354003906, 42.028114318847656, 66.56719970703125, 58.38742446899414, -5.5197601318359375, 72.32223510742188, 16.83415985107422, 60.91740417480469, -6.520355224609375, 28.391510009765625, -5.489295959472656, -2.238534927368164, 38.99806213378906, 23.470478057861328, 22.75060272216797, 34.01702880859375, 23.501686096191406, 18.41126251220703, 12.991092681884766, 26.633316040039062, 11.710624694824219, 34.88360595703125, 17.403518676757812, 1.3875885009765625, -0.9086189270019531, 33.896602630615234, 13.270896911621094], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000369.npy"}
|
|
{"epoch": 0.5418502202643172, "step": 370, "batch_size": 64, "mean": 24.929824829101562, "std": 22.07652473449707, "min": -8.30179214477539, "p10": -1.249482154846191, "median": 24.80476188659668, "p90": 56.0314811706543, "max": 88.1761245727539, "pos_frac": 0.84375, "sample": [24.463077545166016, 31.512672424316406, -0.7913055419921875, 6.6090240478515625, 88.1761245727539, 6.6258544921875, 27.172164916992188, 14.891578674316406, 1.2645626068115234, 20.239822387695312, 59.61151123046875, 19.41473388671875, 5.7885589599609375, 25.848495483398438, 15.803237915039062, 32.13691711425781, 43.532073974609375, 7.424686431884766, 27.35149383544922, 57.56254577636719, 49.655372619628906, -0.820159912109375, 60.42750930786133, 15.587699890136719, 25.146446228027344, 29.33121109008789, 24.358718872070312, -1.9532032012939453, 4.351402282714844, 30.20520782470703, 26.863624572753906, 7.086944580078125, 10.04330062866211, 48.702369689941406, -8.30179214477539, 29.775592803955078, 25.79108428955078, 39.50360107421875, 0.959747314453125, -1.4334774017333984, 42.855926513671875, 41.21293640136719, -4.111759185791016, 27.522747039794922, 55.06756591796875, 23.076732635498047, 16.180801391601562, 42.34852600097656, 41.3656120300293, -1.892059326171875, 1.7277679443359375, -5.2489471435546875, 35.8055419921875, 65.14761352539062, -0.0966339111328125, 10.581398010253906, 14.374137878417969, -3.6420059204101562, 1.2325172424316406, 79.72041320800781, 40.91602325439453, 47.075340270996094, 56.44458770751953, 37.924957275390625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000370.npy"}
|
|
{"epoch": 0.5433186490455213, "step": 371, "batch_size": 64, "mean": 27.12832260131836, "std": 28.606050491333008, "min": -31.03631591796875, "p10": -0.13489761352538931, "median": 20.910491943359375, "p90": 57.38357696533203, "max": 122.3263931274414, "pos_frac": 0.890625, "sample": [57.01066589355469, 52.970970153808594, 17.079803466796875, 7.873357772827148, -31.03631591796875, 1.2091064453125, 122.3263931274414, 19.614110946655273, 56.56268310546875, 47.46638488769531, 6.6015777587890625, 7.119634628295898, 31.474319458007812, 35.06859588623047, -8.13690185546875, 15.881515502929688, 29.016944885253906, 12.28373908996582, 73.44371032714844, 31.80748748779297, 5.346590042114258, 16.263587951660156, 25.898239135742188, -16.795242309570312, -5.940399169921875, 49.81414031982422, 86.36447143554688, 39.192169189453125, 30.00774383544922, 19.317758560180664, 119.3509750366211, 10.586318969726562, 21.609832763671875, 35.99171447753906, -0.7108993530273438, 11.631195068359375, 30.992828369140625, 14.336158752441406, 9.765689849853516, 58.87687683105469, -16.180448532104492, 22.981300354003906, 56.479339599609375, 20.09495735168457, 5.943946838378906, 53.287086486816406, 25.88146209716797, 2.639923095703125, 55.85234069824219, 17.363372802734375, 26.846649169921875, 3.6758041381835938, 36.48362350463867, 20.211151123046875, 37.193363189697266, 4.428619384765625, 29.746440887451172, 4.241209030151367, -3.154815673828125, 59.722198486328125, 57.54339599609375, 56.209686279296875, 3.9907684326171875, 7.193675994873047], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000371.npy"}
|
|
{"epoch": 0.5447870778267254, "step": 372, "batch_size": 64, "mean": 31.275503158569336, "std": 29.992393493652344, "min": -19.61409568786621, "p10": 4.132645034790039, "median": 27.652746200561523, "p90": 69.65729370117188, "max": 135.39608764648438, "pos_frac": 0.90625, "sample": [29.2171630859375, 49.70752716064453, 96.47564697265625, 25.412734985351562, 4.5763092041015625, 38.276214599609375, 62.112361907958984, 39.3125, 13.541885375976562, 9.277572631835938, 69.19659423828125, 28.986572265625, 9.44537353515625, 57.22441101074219, 5.6750030517578125, 17.01398468017578, 4.056859970092773, 26.318920135498047, 51.591949462890625, -19.61409568786621, -8.25579833984375, 38.416378021240234, 76.60188293457031, 16.54473114013672, 98.43962860107422, 34.143882751464844, 17.880035400390625, 24.880577087402344, 6.8389739990234375, 30.207889556884766, 8.3909912109375, 17.182327270507812, 10.424461364746094, 14.168731689453125, 43.51213073730469, -0.16347312927246094, 70.57707977294922, 43.42436981201172, 10.984670639038086, 29.484840393066406, 19.848724365234375, 43.831947326660156, 31.74831771850586, -18.470428466796875, 51.76011276245117, 69.854736328125, 46.61468505859375, 42.15416717529297, -5.2023773193359375, 30.385826110839844, 8.13385009765625, 57.039947509765625, 42.24055480957031, 11.678060531616211, 101.29817199707031, 4.400720596313477, 19.95387077331543, 29.338043212890625, 4.309476852416992, 135.39608764648438, -13.437154769897461, 11.089118957519531, 63.99913024902344, 12.176856994628906], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000372.npy"}
|
|
{"epoch": 0.5462555066079295, "step": 373, "batch_size": 64, "mean": 32.979225158691406, "std": 26.324975967407227, "min": -15.974594116210938, "p10": 5.735283946990967, "median": 33.9997673034668, "p90": 68.68919525146485, "max": 119.2515869140625, "pos_frac": 0.953125, "sample": [1.4300537109375, 7.053707122802734, 6.483757019042969, 20.68431854248047, 17.25653648376465, 77.83971405029297, 45.43951416015625, 29.25177001953125, 25.178192138671875, 24.487518310546875, 50.22695541381836, 34.428382873535156, 12.08270263671875, 6.286460876464844, 33.08445739746094, 41.46868133544922, 5.563900947570801, 73.68760681152344, 41.154335021972656, 0.071685791015625, 2.9240798950195312, 43.888214111328125, 43.90089416503906, -14.959503173828125, 19.691810607910156, 7.836328506469727, 30.249027252197266, 41.01576232910156, 14.04473876953125, 12.272258758544922, 8.656867980957031, 61.109893798828125, 34.53313446044922, 8.617584228515625, 76.3611068725586, 6.1351776123046875, 41.07006072998047, 76.60484313964844, 11.82600212097168, 10.083709716796875, 53.48765563964844, 39.69426727294922, 51.28845977783203, 80.08770751953125, 69.53501892089844, 41.46836853027344, 41.53142547607422, 119.2515869140625, 63.385459899902344, 40.953826904296875, -11.649566650390625, 66.71560668945312, 14.474090576171875, 36.662071228027344, 8.232536315917969, 52.598167419433594, 54.54359436035156, 12.813074111938477, 44.98627471923828, 33.57115173339844, 54.998565673828125, 45.51830291748047, -15.974594116210938, 23.475013732910156], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000373.npy"}
|
|
{"epoch": 0.5477239353891337, "step": 374, "batch_size": 64, "mean": 29.80048942565918, "std": 25.573938369750977, "min": -10.293472290039062, "p10": 2.551044845581056, "median": 25.200302124023438, "p90": 68.3887809753418, "max": 98.57064056396484, "pos_frac": 0.9375, "sample": [14.249130249023438, 25.256492614746094, 8.222152709960938, 80.37310791015625, 27.479324340820312, 50.09088134765625, 8.918075561523438, 36.79203796386719, 18.583465576171875, 2.053607940673828, 11.327346801757812, 1.4392318725585938, 22.688491821289062, 1.2692070007324219, 29.412872314453125, 8.132926940917969, 45.95409393310547, 28.9466552734375, 65.75935363769531, 10.767353057861328, 34.481597900390625, 18.22634506225586, 84.5960922241211, 21.93902587890625, -0.5776939392089844, 86.07693481445312, 21.475099563598633, 28.925411224365234, -6.31005859375, 26.16046905517578, 56.438232421875, 44.25038146972656, 7.1015625, 71.67393493652344, -9.534576416015625, 60.68998718261719, 4.24755859375, 56.488250732421875, 9.864889144897461, 5.43878173828125, 14.245121002197266, 33.498085021972656, 15.643535614013672, 14.492563247680664, 7.322187423706055, 25.14411163330078, 33.61668395996094, 98.57064056396484, 35.62657165527344, 31.855239868164062, 4.7896270751953125, 77.10850524902344, 15.142532348632812, 60.60385513305664, 35.56095886230469, 20.08953857421875, -10.293472290039062, 55.339210510253906, 41.470603942871094, 17.495840072631836, 55.74018859863281, 3.71173095703125, 31.57379913330078, 69.51567840576172], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000374.npy"}
|
|
{"epoch": 0.5491923641703378, "step": 375, "batch_size": 64, "mean": 29.460506439208984, "std": 25.48676109313965, "min": -15.105438232421875, "p10": -1.250460052490234, "median": 26.082965850830078, "p90": 63.993035888671876, "max": 85.49844360351562, "pos_frac": 0.859375, "sample": [68.7127914428711, 45.65232467651367, 3.34381103515625, 24.29412841796875, 27.196617126464844, 2.5595130920410156, 64.01866149902344, 17.967330932617188, -1.3378105163574219, 9.834980010986328, 7.662618637084961, 41.35746765136719, 45.83842468261719, 1.6017532348632812, -4.210411071777344, 49.3707275390625, 56.394142150878906, 15.57371711730957, 53.95439910888672, 24.969314575195312, 13.921844482421875, 47.04180908203125, -3.907867431640625, 30.94115447998047, 10.274112701416016, 11.437118530273438, 77.04570007324219, -6.263500213623047, 3.972320556640625, 44.66160583496094, 28.608245849609375, 15.98626708984375, 60.80168151855469, 56.916229248046875, -1.0466423034667969, 39.12333679199219, 63.93324279785156, 35.52314758300781, 17.960403442382812, 72.82694244384766, 17.732582092285156, 16.090051651000977, 15.677452087402344, -15.105438232421875, 62.26679992675781, 56.09258270263672, 57.310340881347656, -6.723670959472656, 85.49844360351562, 64.96241760253906, 41.083099365234375, -9.657154083251953, -0.483306884765625, 65.04054260253906, 31.827346801757812, 60.77239227294922, 3.279052734375, 45.34300994873047, 20.347251892089844, 4.628589630126953, 19.40142822265625, 32.802635192871094, 37.57896041870117, 5.195331573486328], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000375.npy"}
|
|
{"epoch": 0.5506607929515418, "step": 376, "batch_size": 64, "mean": 32.31665802001953, "std": 27.68313217163086, "min": -17.767858505249023, "p10": 4.782272529602051, "median": 25.43633270263672, "p90": 78.7999481201172, "max": 108.45077514648438, "pos_frac": 0.953125, "sample": [30.78870391845703, 91.017333984375, 15.084091186523438, 18.57904052734375, 94.41456604003906, 31.848915100097656, 76.53936767578125, -5.45707893371582, 14.278335571289062, 7.4388580322265625, 34.202667236328125, 3.3204479217529297, 57.79142761230469, 8.06658935546875, 2.8897628784179688, 42.174560546875, 34.785911560058594, 4.9830780029296875, 26.278663635253906, 19.183774948120117, 15.234130859375, 4.979375839233398, 12.56024169921875, 79.76876831054688, 64.04920959472656, 18.013656616210938, 38.20903015136719, 41.26396179199219, 64.29618072509766, 10.331554412841797, 57.73289489746094, 98.76008605957031, 4.6977996826171875, 62.70771789550781, 48.31135559082031, 10.810050964355469, 42.14311981201172, 49.56492614746094, 27.318405151367188, 32.091007232666016, 24.514556884765625, 31.087421417236328, 18.893692016601562, 23.870758056640625, 29.15838623046875, 16.341785430908203, 108.45077514648438, 25.007614135742188, -4.581203460693359, 1.6901626586914062, 23.30120849609375, -17.767858505249023, 80.04469299316406, 25.86505126953125, 16.463485717773438, 80.43817138671875, 21.932594299316406, 40.860191345214844, 14.012739181518555, 5.353424072265625, 27.792692184448242, 22.892772674560547, 52.74588394165039, 8.844505310058594], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000376.npy"}
|
|
{"epoch": 0.5521292217327459, "step": 377, "batch_size": 64, "mean": 29.981285095214844, "std": 27.36408042907715, "min": -19.292030334472656, "p10": -0.8770988464355468, "median": 27.909320831298828, "p90": 68.54915161132813, "max": 102.1872787475586, "pos_frac": 0.859375, "sample": [35.38666534423828, 64.89399719238281, 32.54766845703125, 14.701370239257812, 15.66533088684082, 7.442819595336914, 69.85061645507812, 19.727630615234375, 69.82215881347656, 3.5503158569335938, 70.29812622070312, 61.113677978515625, 19.569137573242188, 91.8385009765625, 11.620828628540039, 49.186920166015625, 29.970108032226562, 62.345977783203125, 29.78289031982422, 2.568521499633789, 42.22199249267578, 60.34675598144531, 8.969879150390625, -0.3541679382324219, 69.43756103515625, -0.79083251953125, 53.01043701171875, -3.7694854736328125, 66.4761962890625, 7.680671691894531, 29.90346908569336, 22.127870559692383, 10.387931823730469, 20.67388916015625, 4.300563812255859, 44.058998107910156, 51.35148620605469, 18.802207946777344, 35.869720458984375, 51.80436706542969, 32.89330291748047, -0.9250564575195312, -19.292030334472656, 102.1872787475586, 72.6817398071289, 15.734285354614258, 32.842437744140625, -12.344955444335938, 15.22348403930664, 65.62519836425781, 26.035751342773438, 62.50498962402344, 0.6925582885742188, 0.65338134765625, -0.9140701293945312, 31.34168243408203, 12.946357727050781, 12.664794921875, 36.67498779296875, -6.523859024047852, 41.26835632324219, 17.58979034423828, 37.485687255859375, -12.636665344238281], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000377.npy"}
|
|
{"epoch": 0.55359765051395, "step": 378, "batch_size": 64, "mean": 31.929780960083008, "std": 25.704288482666016, "min": -18.05096435546875, "p10": 4.556723213195802, "median": 30.30886459350586, "p90": 66.19393386840821, "max": 106.23587036132812, "pos_frac": 0.921875, "sample": [36.090904235839844, 19.491060256958008, 94.9383544921875, 87.42268371582031, 52.65171813964844, 17.45719337463379, 18.324764251708984, 26.01441192626953, 19.277450561523438, 19.96255874633789, 36.29659652709961, 30.33477783203125, 9.799728393554688, 30.94500732421875, 65.3623046875, 3.9567718505859375, 6.028778076171875, 37.54441833496094, 20.425003051757812, 42.355018615722656, 18.577470779418945, -1.8303947448730469, 14.957115173339844, 34.417724609375, 14.597251892089844, 16.995012283325195, 15.489023208618164, 56.974205017089844, 26.853242874145508, 36.395263671875, 47.20026397705078, 41.17205810546875, 44.34119415283203, -16.407926559448242, -8.339813232421875, 66.55034637451172, 30.28295135498047, 91.8892822265625, 0.3084545135498047, 35.387176513671875, 32.308250427246094, 37.047115325927734, 24.327674865722656, 77.02899169921875, 8.533241271972656, -8.765096664428711, 62.78117370605469, 32.3831787109375, 106.23587036132812, 28.539640426635742, 26.861106872558594, 50.804908752441406, 31.084609985351562, 35.016937255859375, 36.95209503173828, 35.234519958496094, 27.419891357421875, 9.271064758300781, 15.372398376464844, 24.01367950439453, 50.82269287109375, -18.05096435546875, 5.956609725952148, 71.83695983886719], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000378.npy"}
|
|
{"epoch": 0.5550660792951542, "step": 379, "batch_size": 64, "mean": 33.164710998535156, "std": 28.849937438964844, "min": -19.288570404052734, "p10": 1.7665946960449235, "median": 26.000293731689453, "p90": 76.71302490234378, "max": 115.73968505859375, "pos_frac": 0.921875, "sample": [63.13976287841797, 40.759185791015625, 62.616981506347656, 39.84925842285156, 12.284912109375, 8.578609466552734, 17.686325073242188, 69.91087341308594, 31.78515625, 24.575767517089844, 52.85936737060547, 49.386566162109375, 4.402587890625, 46.82159423828125, 64.21173095703125, 39.20353698730469, 17.456558227539062, 79.02536010742188, 13.342605590820312, 37.55963897705078, 46.09337615966797, 42.672420501708984, 18.56385040283203, 19.50729751586914, 30.423095703125, 17.499052047729492, 31.93499755859375, 10.788349151611328, 6.835849761962891, 0.64935302734375, 71.92301940917969, 32.184303283691406, 1.0992050170898438, 39.106475830078125, 56.549072265625, 78.76588439941406, 17.504440307617188, 64.04263305664062, 102.9554443359375, 11.732852935791016, 15.868997573852539, 85.20124816894531, 21.54422378540039, -2.892721176147461, 52.44715118408203, -4.826530456542969, 9.361785888671875, 27.34355926513672, 85.07217407226562, 5.734722137451172, -19.288570404052734, 19.604217529296875, 46.6270751953125, 24.657028198242188, 115.73968505859375, 3.3238372802734375, -6.63385009765625, 22.576248168945312, 15.39478874206543, 12.472591400146484, 36.775665283203125, 81.39291381835938, -8.295490264892578, 7.053428649902344], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000379.npy"}
|
|
{"epoch": 0.5565345080763583, "step": 380, "batch_size": 64, "mean": 34.69824981689453, "std": 30.75237464904785, "min": -20.6903076171875, "p10": -6.3120891571044915, "median": 33.50398635864258, "p90": 84.15966720581058, "max": 96.36842346191406, "pos_frac": 0.828125, "sample": [36.77935028076172, 30.127601623535156, -2.66107177734375, 54.77996826171875, 37.14306640625, 8.647525787353516, 15.576148986816406, 89.61991882324219, 32.42502212524414, 15.853523254394531, 13.070671081542969, 94.654052734375, -1.1408157348632812, 96.36842346191406, 90.03887939453125, 5.868480682373047, 15.971382141113281, 48.657012939453125, 61.82440185546875, 15.7677001953125, 59.865657806396484, -14.577285766601562, 11.29275894165039, 34.265525817871094, 22.707841873168945, -7.709461212158203, 61.84614562988281, 12.03424072265625, 61.73707580566406, 86.99866485595703, 39.23832702636719, 49.59512710571289, 45.520042419433594, 32.74244689941406, 77.53533935546875, 49.64788055419922, 37.28960418701172, 30.775894165039062, 50.422725677490234, 30.08545684814453, 89.87490844726562, -6.675655364990234, -12.544595718383789, -3.5836753845214844, -20.6903076171875, 44.080162048339844, 63.840179443359375, 40.304969787597656, 13.610992431640625, -14.939102172851562, 76.28779602050781, 7.322271347045898, 64.89537811279297, 25.188079833984375, 44.405975341796875, -5.463768005371094, 57.98272705078125, 21.801584243774414, 29.4882755279541, -14.9146728515625, 28.190994262695312, 38.94464111328125, 87.27117919921875, 35.32438659667969], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000380.npy"}
|
|
{"epoch": 0.5580029368575624, "step": 381, "batch_size": 64, "mean": 34.22349548339844, "std": 30.587743759155273, "min": -29.2945556640625, "p10": 4.905857086181641, "median": 28.489730834960938, "p90": 71.69346618652344, "max": 106.49234008789062, "pos_frac": 0.90625, "sample": [5.485553741455078, 40.08418273925781, 12.517173767089844, 88.63264465332031, 67.167236328125, 56.198089599609375, 70.08819580078125, 13.097366333007812, 26.58045768737793, 64.53768157958984, 56.66193389892578, 31.66552734375, 19.156116485595703, 39.68806457519531, 106.49234008789062, -13.664154052734375, 66.17426300048828, 47.30772399902344, 97.54512023925781, 72.38143920898438, 101.17710876464844, 64.62167358398438, 8.575063705444336, 5.531028747558594, -6.380558013916016, 36.501102447509766, 91.9439697265625, 24.61758804321289, -15.995826721191406, 35.65800476074219, 8.335714340209961, 52.12218475341797, 32.23017120361328, 25.620254516601562, 9.2965087890625, 5.182605743408203, 28.270164489746094, 31.402313232421875, 33.505069732666016, 9.154388427734375, 35.92595672607422, 27.009109497070312, 59.49969482421875, 64.68406677246094, 30.649940490722656, 28.70929718017578, -8.654022216796875, 22.428585052490234, 64.10888671875, 19.650501251220703, 22.736692428588867, 84.89679718017578, 18.06048583984375, 8.855438232421875, 4.787250518798828, -15.617919921875, 24.267105102539062, 60.47907257080078, 56.92195129394531, 11.219526290893555, 25.853214263916016, -29.2945556640625, 7.3427276611328125, 16.618553161621094], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000381.npy"}
|
|
{"epoch": 0.5594713656387665, "step": 382, "batch_size": 64, "mean": 35.52247619628906, "std": 28.128053665161133, "min": -26.462600708007812, "p10": 2.0245948791503907, "median": 31.464111328125, "p90": 70.64026641845703, "max": 121.1675796508789, "pos_frac": 0.90625, "sample": [121.1675796508789, 41.5135498046875, 21.635019302368164, 24.646453857421875, 15.178705215454102, 83.15675354003906, 1.9417190551757812, 13.667381286621094, 67.16828918457031, 12.573192596435547, 46.59938049316406, 59.36024475097656, 56.225013732910156, 21.66143798828125, 31.4892578125, 44.88352966308594, 50.33984375, -3.3442306518554688, 49.60401153564453, 37.314300537109375, 35.35398864746094, 28.814788818359375, 18.358444213867188, 27.573497772216797, 29.165969848632812, -0.4141998291015625, 31.603029251098633, 27.368804931640625, 15.11041259765625, 12.662918090820312, 72.60133361816406, -3.7425594329833984, -7.725971221923828, 71.0333251953125, 67.15254211425781, -26.462600708007812, 51.63037109375, 41.21087646484375, 8.85174560546875, 72.69330596923828, 9.123283386230469, 31.43896484375, 69.72312927246094, -6.02342414855957, 24.485702514648438, 15.274436950683594, 56.54669189453125, 67.88784790039062, 21.617431640625, 63.82066345214844, 98.2291030883789, 95.4526596069336, 36.48772430419922, 13.271631240844727, 34.766883850097656, 41.43177795410156, 13.573780059814453, 32.07939529418945, 39.57929992675781, 67.76412963867188, 17.3049373626709, 28.58074188232422, 2.2179718017578125, 29.1822509765625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000382.npy"}
|
|
{"epoch": 0.5609397944199707, "step": 383, "batch_size": 64, "mean": 32.29367446899414, "std": 25.84360122680664, "min": -10.210609436035156, "p10": 3.504080963134766, "median": 28.656344413757324, "p90": 60.560729598999025, "max": 118.09507751464844, "pos_frac": 0.9375, "sample": [31.258056640625, 12.089302062988281, 35.0657958984375, -7.551307678222656, 36.99517059326172, 9.071897506713867, -10.210609436035156, 95.13917541503906, 49.81453323364258, 4.540531158447266, 59.43473815917969, 20.677379608154297, 6.166004180908203, 15.990211486816406, -0.6606597900390625, 25.726707458496094, 118.09507751464844, 3.4656906127929688, 39.260040283203125, 23.19472885131836, 60.32110595703125, 17.005393981933594, 21.96099853515625, 49.767364501953125, 35.96752166748047, -6.426368713378906, 84.87579345703125, 29.436281204223633, 9.017868041992188, 9.170150756835938, 86.01311492919922, 11.166498184204102, 37.42559814453125, 38.30158996582031, 42.12560272216797, 21.313262939453125, 55.32160186767578, 21.009265899658203, 60.66342544555664, 49.108238220214844, 8.198577880859375, 5.0513458251953125, 27.876407623291016, 43.93852615356445, 53.158958435058594, 26.129150390625, 3.593658447265625, 74.11213684082031, 23.043624877929688, 40.99272918701172, 64.09654235839844, 36.67255401611328, 3.274993896484375, 39.62091064453125, 46.688743591308594, 37.04084014892578, 29.816253662109375, 58.84928894042969, 23.645793914794922, 23.5369873046875, 9.443618774414062, 1.9728240966796875, 25.59356689453125, 59.34028244018555], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000383.npy"}
|
|
{"epoch": 0.5624082232011748, "step": 384, "batch_size": 64, "mean": 32.187374114990234, "std": 31.6424560546875, "min": -13.718070983886719, "p10": -0.3848163604736301, "median": 25.140990257263184, "p90": 73.91573715209962, "max": 157.997314453125, "pos_frac": 0.890625, "sample": [18.54424285888672, 26.028648376464844, 44.69647979736328, 3.268360137939453, 10.227455139160156, 11.081146240234375, -13.718070983886719, -6.3627166748046875, 108.43299102783203, 23.595844268798828, 84.11202239990234, 37.15032958984375, 13.96595573425293, 5.48431396484375, 39.01997756958008, 10.315673828125, 19.605148315429688, 10.772771835327148, 58.83030700683594, 25.765869140625, 41.90347671508789, -9.481088638305664, 24.516111373901367, 38.433135986328125, 2.4447898864746094, -8.026580810546875, 6.455619812011719, -3.4098892211914062, -2.651660919189453, 37.040977478027344, 57.007301330566406, 44.271671295166016, 8.063863754272461, 26.924236297607422, 81.5328140258789, 2.2607688903808594, 67.20811462402344, 47.07624053955078, 59.651649475097656, 3.446533203125, 24.05535888671875, 16.318954467773438, 13.790046691894531, 30.583656311035156, 15.8724365234375, 75.46978759765625, 96.46229553222656, 40.20994567871094, 33.844024658203125, 28.15418243408203, 70.28961944580078, 8.748832702636719, 49.542694091796875, 157.997314453125, 49.74736022949219, -1.5186386108398438, 22.966320037841797, 40.582061767578125, 5.452470779418945, 23.592792510986328, 44.4266357421875, 87.12326049804688, 16.067001342773438, 54.72859191894531], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000384.npy"}
|
|
{"epoch": 0.5638766519823789, "step": 385, "batch_size": 64, "mean": 28.625473022460938, "std": 22.074491500854492, "min": -14.95257568359375, "p10": 0.14277954101562523, "median": 29.770654678344727, "p90": 56.98138771057129, "max": 76.28366088867188, "pos_frac": 0.921875, "sample": [-1.24151611328125, 13.163009643554688, 1.5127029418945312, 49.446475982666016, 57.29775619506836, 40.78887939453125, 55.44786071777344, 11.94818115234375, 39.93852996826172, 55.56272888183594, 40.608184814453125, -4.261199951171875, -6.8197021484375, 50.5512580871582, 62.42308044433594, 8.198768615722656, 54.001712799072266, 23.151386260986328, 47.11916732788086, 16.58849334716797, 60.47251510620117, 8.56917953491211, -14.95257568359375, 45.45432662963867, 2.5701980590820312, 52.02168273925781, 39.25608444213867, 24.981971740722656, 15.413238525390625, 46.47840881347656, 27.046218872070312, 11.313140869140625, 0.380584716796875, 56.243194580078125, 71.95955657958984, 39.00798416137695, 0.005603790283203125, 8.923957824707031, 57.624168395996094, 41.35539245605469, 2.5296478271484375, 0.040863037109375, 32.949981689453125, 29.127979278564453, 38.3228759765625, 31.209312438964844, 9.894403457641602, 7.5733642578125, 30.413330078125, 7.487297058105469, 30.905441284179688, 37.918731689453125, 22.456703186035156, -0.6337814331054688, 27.30727767944336, 6.367034912109375, 76.28366088867188, 37.402122497558594, 26.040712356567383, 53.181243896484375, 58.38954162597656, 34.775413513183594, 16.29100799560547, 6.2455291748046875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000385.npy"}
|
|
{"epoch": 0.5653450807635829, "step": 386, "batch_size": 64, "mean": 34.01884460449219, "std": 24.481178283691406, "min": -21.662757873535156, "p10": 9.051087188720704, "median": 32.14311981201172, "p90": 64.5694076538086, "max": 109.6986083984375, "pos_frac": 0.9375, "sample": [5.101207733154297, 46.56142044067383, 9.244132995605469, 44.93727493286133, 55.8494873046875, 23.40887451171875, 11.332695007324219, 20.35014533996582, 27.734107971191406, 46.780670166015625, 45.525211334228516, 83.87257385253906, -10.942489624023438, 15.894880294799805, 48.09552001953125, 50.9073486328125, 33.094261169433594, 10.700782775878906, 18.046213150024414, -21.662757873535156, 26.467531204223633, 49.58149719238281, 36.24571990966797, 36.01458740234375, 35.68926239013672, 59.26090621948242, 18.705272674560547, 36.85129165649414, 70.87113189697266, 75.65962982177734, 8.968353271484375, 64.840087890625, 58.30778503417969, 24.51671600341797, 27.566818237304688, 68.20719909667969, 109.6986083984375, 31.191978454589844, 50.114471435546875, 13.942087173461914, 36.698394775390625, 47.43935775756836, 20.107955932617188, 34.819698333740234, 22.184295654296875, 12.573844909667969, 11.07025146484375, 56.041236877441406, 24.17987060546875, 56.55279541015625, 9.482528686523438, 8.280158996582031, 12.990985870361328, 27.493648529052734, 20.564958572387695, 36.00090789794922, 63.93782043457031, 42.47550964355469, -4.298698425292969, -8.253448486328125, 73.62967681884766, 28.850881576538086, 63.420387268066406, 13.430427551269531], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000386.npy"}
|
|
{"epoch": 0.566813509544787, "step": 387, "batch_size": 64, "mean": 28.748126983642578, "std": 23.2883358001709, "min": -16.17566680908203, "p10": 0.006580543518066681, "median": 25.113113403320312, "p90": 60.574073028564456, "max": 82.43023681640625, "pos_frac": 0.890625, "sample": [26.157997131347656, 31.51342010498047, 30.060222625732422, 11.820655822753906, -6.129703521728516, 71.72737884521484, 25.61431884765625, 0.28357887268066406, 62.94512939453125, 50.81736755371094, -16.17566680908203, 47.159629821777344, 12.459943771362305, 35.73154067993164, 6.6756744384765625, 59.15904998779297, 12.86151123046875, 44.25819396972656, 36.56681823730469, 24.611907958984375, 55.31928253173828, 24.013145446777344, 37.43431854248047, 61.180511474609375, 39.27471160888672, 12.645965576171875, 16.638992309570312, 62.604549407958984, 1.6533889770507812, 23.528484344482422, 8.599540710449219, 66.43089294433594, 82.43023681640625, 16.655704498291016, 75.23129272460938, 10.597671508789062, 42.291175842285156, 22.790081024169922, 13.608451843261719, 39.345523834228516, 23.69780731201172, -0.5467100143432617, 22.864913940429688, 48.92090606689453, 4.4405517578125, 57.398468017578125, -11.768043518066406, -7.0191650390625, 58.49884033203125, -7.701118469238281, 28.47174072265625, 54.153839111328125, 10.803306579589844, 35.52936935424805, -0.11213302612304688, 48.23596954345703, 5.3072509765625, 23.76751708984375, 34.12303161621094, 12.515914916992188, 50.73431396484375, 6.288183212280273, 36.37517547607422, 24.507217407226562], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000387.npy"}
|
|
{"epoch": 0.5682819383259912, "step": 388, "batch_size": 64, "mean": 35.99916076660156, "std": 31.401548385620117, "min": -12.852188110351562, "p10": 4.186383819580079, "median": 27.861085891723633, "p90": 71.42238769531251, "max": 134.51425170898438, "pos_frac": 0.9375, "sample": [70.57720947265625, 63.785614013671875, 20.70694351196289, 96.7908935546875, 134.51425170898438, 102.41561126708984, 54.51678466796875, -2.3502120971679688, 25.785484313964844, 20.824081420898438, 17.88006591796875, 70.37448120117188, 37.75505065917969, 27.436803817749023, 4.1154327392578125, 15.817440032958984, 26.97265625, 6.097297668457031, 3.8893890380859375, 6.690071105957031, 39.72325134277344, 88.16993713378906, 4.351936340332031, 4.673618316650391, 37.876285552978516, 55.483768463134766, -5.629280090332031, 28.897628784179688, 23.29444122314453, 20.302825927734375, 1.0240097045898438, 24.987152099609375, 30.333847045898438, -10.842605590820312, 28.285367965698242, 24.985855102539062, 26.971786499023438, 18.907936096191406, 104.03892517089844, 7.604253768920898, 22.464385986328125, 8.861377716064453, 37.8189697265625, 32.44715118408203, 21.201736450195312, 52.13664245605469, 62.845924377441406, 48.36084747314453, 6.6973724365234375, 63.80069351196289, 64.94457244873047, 123.14068603515625, -12.852188110351562, 54.773651123046875, 29.800678253173828, 11.103515625, 17.69847869873047, 20.272907257080078, 71.78460693359375, 55.31745910644531, 28.345016479492188, 42.37770080566406, 47.46915817260742, 35.0987548828125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000388.npy"}
|
|
{"epoch": 0.5697503671071953, "step": 389, "batch_size": 64, "mean": 34.090003967285156, "std": 26.279804229736328, "min": -12.63510513305664, "p10": 3.3240041732788095, "median": 31.090526580810547, "p90": 65.47119293212891, "max": 134.93673706054688, "pos_frac": 0.90625, "sample": [24.974302291870117, 87.78425598144531, 46.88649368286133, -12.63510513305664, -4.147321701049805, 36.92681884765625, 46.676490783691406, 55.44178009033203, 13.707130432128906, 2.9810791015625, 20.425003051757812, -6.5674591064453125, 14.96286392211914, -3.3453140258789062, 16.57529067993164, 25.589561462402344, 10.847209930419922, 41.549713134765625, 62.94945526123047, 20.990760803222656, 23.173309326171875, 16.862075805664062, 80.10240173339844, 19.749462127685547, 20.70226287841797, 61.85906219482422, 55.9940185546875, 29.889434814453125, 43.33290100097656, 31.908987045288086, 49.400596618652344, 24.03289794921875, 38.383575439453125, 48.49738311767578, 13.959724426269531, 17.105667114257812, 48.97749328613281, 15.728652954101562, 47.39872741699219, 36.299713134765625, 69.70367431640625, 10.029386520385742, 34.22777557373047, -1.9586257934570312, 30.597000122070312, 63.34845733642578, 39.749916076660156, 16.08253288269043, 56.583438873291016, 20.598182678222656, 13.655879974365234, 71.62469482421875, 31.58405303955078, -0.10148239135742188, 44.13066482543945, 47.22727966308594, 65.70928955078125, 64.91563415527344, 69.66224670410156, 13.767498016357422, 12.805267333984375, 4.124162673950195, 42.82514190673828, 134.93673706054688], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000389.npy"}
|
|
{"epoch": 0.5712187958883994, "step": 390, "batch_size": 64, "mean": 31.478557586669922, "std": 28.455049514770508, "min": -24.08252716064453, "p10": -1.7472772598266597, "median": 28.54885768890381, "p90": 72.7047821044922, "max": 123.35812377929688, "pos_frac": 0.875, "sample": [29.500225067138672, 24.70431137084961, 86.58805084228516, 91.30525207519531, 6.0529022216796875, -1.961782455444336, 27.931961059570312, 23.517257690429688, 27.98322296142578, 38.77033233642578, 79.49359893798828, 27.643386840820312, 43.195526123046875, 48.49767303466797, -1.9820938110351562, 11.047943115234375, 1.3079986572265625, 123.35812377929688, 21.418563842773438, 72.14692687988281, 44.761322021484375, 19.710723876953125, 72.94386291503906, 29.745559692382812, 5.419061660766602, 32.89905548095703, 13.493036270141602, -11.802066802978516, 47.51658248901367, 24.73682403564453, -5.486427307128906, 24.310821533203125, -1.24676513671875, 25.58330535888672, 51.08293151855469, 35.096099853515625, 60.30558776855469, 86.1220932006836, -22.853984832763672, 41.92426300048828, 58.774253845214844, 30.383209228515625, 6.695274353027344, 30.969432830810547, 32.02301025390625, 32.37370300292969, 41.341697692871094, 22.466217041015625, 75.37435150146484, 47.427528381347656, 28.6802978515625, 23.372451782226562, 10.5699462890625, 33.848182678222656, 28.417417526245117, 1.8104877471923828, 13.242446899414062, -9.562566757202148, -24.08252716064453, 66.58345031738281, 7.682304382324219, 10.888004302978516, 46.827354431152344, 43.74053192138672], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000390.npy"}
|
|
{"epoch": 0.5726872246696035, "step": 391, "batch_size": 64, "mean": 27.683563232421875, "std": 27.29060935974121, "min": -16.2227783203125, "p10": -0.21825866699218588, "median": 23.451786041259766, "p90": 60.69417800903324, "max": 125.81134033203125, "pos_frac": 0.890625, "sample": [1.4825859069824219, 6.5814056396484375, 21.133079528808594, 12.072113037109375, -5.223175048828125, 27.53266143798828, 14.305440902709961, 32.76696014404297, 29.659564971923828, 43.582275390625, 4.4822540283203125, 51.91064453125, -4.572612762451172, 21.546096801757812, 30.161666870117188, 25.729204177856445, 16.124975204467773, 27.712448120117188, 4.131858825683594, 20.056884765625, 24.93152618408203, 67.89850616455078, 21.9720458984375, 5.701362609863281, -10.89323616027832, 30.42270278930664, 125.81134033203125, 45.647117614746094, 11.847274780273438, 51.321685791015625, 8.647651672363281, 48.580467224121094, 26.238018035888672, 37.29749298095703, -0.880523681640625, 10.287967681884766, 34.698204040527344, 37.28263854980469, 81.8049087524414, 18.693809509277344, -7.0987701416015625, 10.856193542480469, 37.19308853149414, 15.11594009399414, 105.59500122070312, 30.229080200195312, 79.689208984375, 7.51793098449707, -4.0853424072265625, 13.750244140625, 2.226472854614258, 11.093276977539062, -16.2227783203125, 46.92506790161133, 78.40999603271484, 51.30938720703125, 64.45854949951172, 35.56114196777344, 48.73771667480469, 1.3270263671875, 41.71940612792969, 41.783687591552734, 5.4937591552734375, 11.675315856933594], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000391.npy"}
|
|
{"epoch": 0.5741556534508077, "step": 392, "batch_size": 64, "mean": 34.45188903808594, "std": 26.532011032104492, "min": -10.12594223022461, "p10": 6.840547180175782, "median": 27.127952575683594, "p90": 66.87245788574221, "max": 130.47052001953125, "pos_frac": 0.953125, "sample": [38.99089813232422, 23.944427490234375, 7.635917663574219, 27.053024291992188, 36.333343505859375, 26.052268981933594, 27.202880859375, 12.561431884765625, 27.03125762939453, 49.79285430908203, 116.33683013916016, 11.268203735351562, 53.07954406738281, 6.578897476196289, 11.155303955078125, 15.655073165893555, 92.36026000976562, 61.644866943359375, 19.66864776611328, 19.815326690673828, 26.898880004882812, 43.65287780761719, 39.868309020996094, 30.465787887573242, 16.43408966064453, 33.44105529785156, 56.81768798828125, 76.43353271484375, 69.11285400390625, 51.80640411376953, 43.05424499511719, 31.39984130859375, 57.708961486816406, 14.484615325927734, 49.427032470703125, 22.515106201171875, 6.1472015380859375, 34.81704330444336, 4.250434875488281, 12.01639175415039, 4.238800048828125, 39.119873046875, 24.624832153320312, 23.07239532470703, 70.17625427246094, 56.28825378417969, 40.78662109375, 70.9393081665039, 45.04979705810547, -3.4470386505126953, 7.45106315612793, 43.07415771484375, -10.12594223022461, 23.029983520507812, 36.409873962402344, 17.08589744567871, 26.937599182128906, 12.037113189697266, 130.47052001953125, -0.2286376953125, 17.547435760498047, 15.168182373046875, 51.74949645996094, 58.55157470703125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000392.npy"}
|
|
{"epoch": 0.5756240822320118, "step": 393, "batch_size": 64, "mean": 27.648277282714844, "std": 27.87200164794922, "min": -13.310443878173828, "p10": 0.9448600769042972, "median": 19.51966094970703, "p90": 71.16518859863282, "max": 115.21476745605469, "pos_frac": 0.921875, "sample": [7.259006500244141, 58.984771728515625, 29.179630279541016, 16.692026138305664, 2.0266876220703125, 81.9947738647461, 2.9015541076660156, 75.67585754394531, 37.888946533203125, 65.33085632324219, 24.8182373046875, 32.10108947753906, 10.10993766784668, 71.83173370361328, 6.9073486328125, 12.345787048339844, 96.113525390625, 66.470703125, 28.484649658203125, -0.3929290771484375, 9.898508071899414, 0.04638671875, 5.413732528686523, 16.91130828857422, 12.592288970947266, 40.0961799621582, 1.241363525390625, 34.56043243408203, 25.352935791015625, 12.423126220703125, 31.010669708251953, 35.60798645019531, 45.30206298828125, 85.67706298828125, 9.585289001464844, 3.1200904846191406, 37.890106201171875, 18.04883575439453, 69.60991668701172, 20.184646606445312, -6.9796905517578125, 34.83650207519531, 39.22959899902344, 6.788261413574219, 35.589115142822266, 45.97320556640625, 72.57349395751953, 4.4692840576171875, 18.85467529296875, 0.8177871704101562, 24.787437438964844, 10.899200439453125, 1.3781871795654297, -13.310443878173828, 6.763629913330078, 14.892433166503906, 32.48468017578125, 4.821067810058594, 45.00506591796875, -3.5635108947753906, -12.619932174682617, 40.626007080078125, 115.21476745605469, 8.66189193725586], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000393.npy"}
|
|
{"epoch": 0.5770925110132159, "step": 394, "batch_size": 64, "mean": 30.533206939697266, "std": 31.193511962890625, "min": -30.48400115966797, "p10": -3.204917144775388, "median": 20.7296199798584, "p90": 81.40430908203125, "max": 112.64308166503906, "pos_frac": 0.875, "sample": [33.24875259399414, 14.851737976074219, -30.48400115966797, 41.5777473449707, 53.97687530517578, 15.45269775390625, 41.733768463134766, 2.985614776611328, 81.11053466796875, -0.819244384765625, 77.95503234863281, -6.1803436279296875, 1.536529541015625, 20.01214599609375, -5.731040954589844, 23.72101593017578, -5.3698272705078125, 103.06338500976562, 60.391204833984375, 12.059555053710938, 46.61651611328125, 19.634300231933594, 37.597320556640625, 26.273269653320312, 16.21890640258789, 32.332191467285156, 87.494384765625, 12.265945434570312, -9.556251525878906, 84.96044921875, 57.2635498046875, 40.95445251464844, 21.149532318115234, 3.419973373413086, 13.661859512329102, 20.309707641601562, 36.990135192871094, 19.700515747070312, 9.7872314453125, 81.53021240234375, 5.1641387939453125, 38.796875, 10.55862045288086, 38.81085968017578, -16.34601593017578, 17.045166015625, 109.25804901123047, 43.48942565917969, 16.59737777709961, 14.18198013305664, 40.47022247314453, -4.227348327636719, 112.64308166503906, 88.73445129394531, 13.116943359375, 25.786270141601562, 8.118362426757812, 21.511127471923828, 15.484840393066406, 61.49372863769531, 16.46540069580078, 2.004117965698242, 32.848480224609375, 48.422698974609375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000394.npy"}
|
|
{"epoch": 0.57856093979442, "step": 395, "batch_size": 64, "mean": 31.938392639160156, "std": 26.75515365600586, "min": -5.456277847290039, "p10": 1.9816175460815435, "median": 26.770893096923828, "p90": 65.15422515869142, "max": 105.67317199707031, "pos_frac": 0.921875, "sample": [5.057159423828125, 32.363094329833984, 2.6153717041015625, 5.234613418579102, 5.254478454589844, 61.56513214111328, 74.54476928710938, 48.989830017089844, -5.456277847290039, 23.863468170166016, 4.7925567626953125, 54.719120025634766, 28.302169799804688, 23.999652862548828, 5.547584533691406, 35.59504318237305, 66.73628234863281, 46.005165100097656, 41.350067138671875, 1.7100086212158203, 51.87117004394531, -1.3290863037109375, 69.97492980957031, 38.83906555175781, 4.672950744628906, 31.825302124023438, 37.21388244628906, 24.715259552001953, 1.071624755859375, 4.096076965332031, 105.67317199707031, 81.48775482177734, 38.44914245605469, 11.080286026000977, 45.89094543457031, 4.803985595703125, 25.23961639404297, 21.959182739257812, 66.19966125488281, 57.04150390625, 62.714874267578125, 46.86894226074219, -0.01117706298828125, 4.9232330322265625, 20.19454002380371, 56.78582000732422, 3.6697845458984375, 94.96143341064453, 20.243745803833008, -5.237037658691406, 10.017864227294922, -1.7574310302734375, 5.717535018920898, 56.24505615234375, 60.38393783569336, 57.699520111083984, 18.79480743408203, 41.039031982421875, 52.64457702636719, 60.47451400756836, 49.42055130004883, 6.786712646484375, 18.931419372558594, 18.97903823852539], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000395.npy"}
|
|
{"epoch": 0.580029368575624, "step": 396, "batch_size": 64, "mean": 31.344343185424805, "std": 23.985855102539062, "min": -12.659290313720703, "p10": -0.44924507141113157, "median": 28.423534393310547, "p90": 59.269508361816406, "max": 110.92547607421875, "pos_frac": 0.890625, "sample": [58.007293701171875, 33.36549377441406, 2.8144378662109375, 59.30681610107422, 32.18818664550781, 82.26206970214844, 19.021926879882812, 22.465164184570312, 25.01856231689453, 46.67694091796875, 49.324607849121094, 14.125465393066406, 42.83954620361328, 59.182456970214844, -4.385005950927734, 36.356056213378906, 27.13664436340332, 41.59953308105469, 27.625732421875, 31.10846710205078, 50.555992126464844, 50.342002868652344, 110.92547607421875, 23.425575256347656, 29.029769897460938, 18.36138916015625, 29.656234741210938, 47.97447204589844, 21.762924194335938, 35.604408264160156, -0.9662513732910156, 23.618045806884766, -9.464309692382812, 24.5382080078125, 19.351055145263672, 68.35064697265625, 36.2947998046875, 5.095085144042969, 37.45411682128906, 56.613059997558594, 18.282760620117188, 15.381683349609375, 5.1880340576171875, 27.039920806884766, 17.636123657226562, -4.528621673583984, 59.349395751953125, 46.35551452636719, 12.260225296020508, 40.50410461425781, 82.51593017578125, 28.54167938232422, 26.524261474609375, -11.441106796264648, 48.137939453125, 25.167707443237305, 31.171463012695312, 0.7571029663085938, -12.659290313720703, -5.5209503173828125, 45.23287582397461, 72.57354736328125, 24.699188232421875, 28.305389404296875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000396.npy"}
|
|
{"epoch": 0.5814977973568282, "step": 397, "batch_size": 64, "mean": 35.8455810546875, "std": 27.562362670898438, "min": -10.436546325683594, "p10": 3.3675262451171903, "median": 30.555654525756836, "p90": 70.97510986328125, "max": 117.25877380371094, "pos_frac": 0.90625, "sample": [17.10926055908203, 64.15936279296875, 11.571968078613281, 65.3193359375, 28.346580505371094, 25.697860717773438, 8.3441162109375, 27.87256622314453, 72.28572082519531, 20.350265502929688, 53.2572021484375, 23.852188110351562, 29.7000732421875, 13.082969665527344, 15.784698486328125, 33.605194091796875, 12.986686706542969, 22.737838745117188, 47.811126708984375, 21.800399780273438, 31.087078094482422, 87.97526550292969, 41.88496398925781, 45.50466537475586, 70.22428894042969, 6.281654357910156, 64.57192993164062, 67.72454071044922, -5.23480224609375, 80.54630279541016, 68.88040161132812, 15.127079010009766, 7.082099914550781, 83.87628173828125, 54.604862213134766, 24.325828552246094, 23.051219940185547, -6.571754455566406, 34.192832946777344, 47.986595153808594, 9.293441772460938, -5.213306427001953, 17.213359832763672, 2.1186141967773438, 71.29689025878906, 30.02423095703125, -10.436546325683594, 44.66328430175781, 33.167816162109375, 47.10126495361328, -8.799564361572266, 60.970489501953125, 53.60975646972656, 117.25877380371094, 9.941307067871094, 78.5238037109375, 34.99590301513672, -1.9806938171386719, 51.68278884887695, 32.118221282958984, 19.153846740722656, 63.675437927246094, 21.33175277709961, 63.60957336425781], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000397.npy"}
|
|
{"epoch": 0.5829662261380323, "step": 398, "batch_size": 64, "mean": 31.041210174560547, "std": 26.031597137451172, "min": -10.649391174316406, "p10": 5.640168571472168, "median": 25.984329223632812, "p90": 58.289406585693364, "max": 129.79881286621094, "pos_frac": 0.9375, "sample": [16.49584197998047, 13.253646850585938, 58.973541259765625, 30.624221801757812, 37.02827072143555, 33.44788360595703, 29.31238555908203, 9.322998046875, 14.471969604492188, 2.8225173950195312, 46.02305603027344, 5.498748779296875, 52.16575622558594, 30.161258697509766, 44.72905731201172, 129.79881286621094, 27.76569366455078, 34.41460418701172, 76.85073852539062, 45.922523498535156, 42.523834228515625, 25.75328254699707, 35.68632507324219, 25.97020721435547, 59.0455322265625, 21.6087646484375, 25.029205322265625, 12.455947875976562, 17.728836059570312, 21.4195556640625, 31.502769470214844, 55.53115463256836, 22.564773559570312, 5.365673065185547, -2.270355224609375, 9.16986083984375, 25.998451232910156, 11.440032958984375, 52.309776306152344, 12.158302307128906, 38.984867095947266, 44.92149353027344, 34.49391174316406, -2.9422607421875, 27.611602783203125, 10.136444091796875, 24.307262420654297, 14.821990966796875, 18.292381286621094, 42.33453369140625, 5.970148086547852, 62.820831298828125, 56.693092346191406, 117.31784057617188, -10.649391174316406, 43.42835998535156, 97.27047729492188, 23.84716033935547, 18.15673828125, 15.828731536865234, 28.164443969726562, 22.130035400390625, 11.447196960449219, -8.825965881347656], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000398.npy"}
|
|
{"epoch": 0.5844346549192364, "step": 399, "batch_size": 64, "mean": 34.47357940673828, "std": 25.490554809570312, "min": -16.744308471679688, "p10": 1.2411916732788086, "median": 34.38950729370117, "p90": 66.3838592529297, "max": 87.28109741210938, "pos_frac": 0.90625, "sample": [45.013458251953125, 26.70099639892578, 14.469768524169922, 41.3668212890625, 45.0091667175293, -11.990373611450195, 64.82896423339844, 35.58230209350586, 31.715553283691406, 27.80877685546875, 34.18016815185547, 14.773029327392578, 42.212013244628906, -5.31158447265625, 51.83740997314453, 16.451709747314453, 57.37797546386719, 9.347766876220703, 24.530977249145508, 15.763286590576172, 8.886627197265625, 11.771244049072266, 47.39381408691406, 25.92177391052246, 62.971099853515625, 76.5767822265625, 34.32634735107422, 51.03309631347656, -16.744308471679688, 33.41638946533203, 44.9998779296875, 84.13427734375, 53.804443359375, 23.34481430053711, 46.028228759765625, -12.916267395019531, 87.28109741210938, 49.651092529296875, 64.1455307006836, 39.22663879394531, 14.159423828125, 63.207916259765625, 47.04554748535156, -13.745574951171875, 11.933422088623047, 71.48591613769531, 1.3643360137939453, 36.405670166015625, 68.1525650024414, 5.495647430419922, 32.80238342285156, 57.2689208984375, 34.452667236328125, 55.61100769042969, 13.666271209716797, 66.6949462890625, 17.3411865234375, 60.52854537963867, 1.18841552734375, 67.57290649414062, 65.65798950195312, -0.3248138427734375, 18.249805450439453, 13.17314338684082], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000399.npy"}
|
|
{"epoch": 0.5859030837004405, "step": 400, "batch_size": 64, "mean": 38.233909606933594, "std": 31.20196533203125, "min": -27.43792724609375, "p10": 0.9593193054199235, "median": 34.80133819580078, "p90": 74.66145629882813, "max": 136.8827667236328, "pos_frac": 0.90625, "sample": [38.01641845703125, 47.834083557128906, 28.656784057617188, 40.635467529296875, 66.1990966796875, 33.87419128417969, 15.921417236328125, 54.39481735229492, 14.084983825683594, 20.65848159790039, 52.71154022216797, 19.77028465270996, 34.796478271484375, 34.80619812011719, -23.402572631835938, 39.434051513671875, 71.14082336425781, 9.929605484008789, 24.897506713867188, 44.541011810302734, 99.33673858642578, 65.17190551757812, 44.69496154785156, 75.50550842285156, 3.7217254638671875, 47.58122253417969, 40.40941619873047, 56.199974060058594, 15.777786254882812, 79.03117370605469, -1.7084217071533203, -9.100242614746094, -9.846611022949219, 21.740028381347656, 34.62995910644531, 22.602920532226562, 10.664405822753906, 85.48161315917969, 27.501129150390625, 43.33868408203125, 0.25733184814453125, 19.342578887939453, 37.865116119384766, 108.80321502685547, -27.43792724609375, 70.94215393066406, 28.961082458496094, 48.371681213378906, 31.381732940673828, 136.8827667236328, 30.4029541015625, 6.9068145751953125, 23.55738067626953, 52.67003631591797, 27.416725158691406, 72.69200134277344, -1.7286834716796875, 61.16206359863281, 62.56036376953125, 92.57330322265625, 2.5972900390625, 62.354461669921875, 63.849761962890625, 12.951560974121094], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000400.npy"}
|
|
{"epoch": 0.5873715124816447, "step": 401, "batch_size": 64, "mean": 29.519914627075195, "std": 31.654132843017578, "min": -32.24431610107422, "p10": -6.396365547180175, "median": 23.015088081359863, "p90": 82.00961303710938, "max": 126.46450805664062, "pos_frac": 0.84375, "sample": [-4.726291656494141, 25.981292724609375, 15.584705352783203, 35.40656280517578, 35.59173583984375, 23.21970558166504, -9.479421615600586, -32.24431610107422, -10.103378295898438, -6.587162017822266, 8.203849792480469, -2.874908447265625, 21.03644561767578, 12.690423965454102, 8.4990234375, 3.258268356323242, 45.04087829589844, 56.36077880859375, 4.5262298583984375, 43.16412353515625, 0.101043701171875, -7.475456237792969, 42.346839904785156, 51.01470947265625, 14.330007553100586, 83.33003234863281, 68.90257263183594, 46.42626953125, 126.46450805664062, 16.574337005615234, 25.868698120117188, 13.840240478515625, 12.416725158691406, 24.84930419921875, -7.573551177978516, 8.409561157226562, 88.48533630371094, 3.269115447998047, 52.08032989501953, 37.21656799316406, 48.840911865234375, 57.009925842285156, 24.002273559570312, 3.45233154296875, 22.810470581054688, 18.550636291503906, 29.541854858398438, 20.69280242919922, 82.48641967773438, 18.285385131835938, 80.89706420898438, 40.6295166015625, 39.08957290649414, 23.28365135192871, 89.86065673828125, 90.23876953125, 8.763837814331055, 19.410554885864258, 51.887733459472656, -5.951173782348633, 62.35699462890625, 94.34915924072266, 10.123222351074219, -14.763811111450195], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000401.npy"}
|
|
{"epoch": 0.5888399412628488, "step": 402, "batch_size": 64, "mean": 38.97754669189453, "std": 23.835172653198242, "min": -10.785377502441406, "p10": 11.025777435302738, "median": 37.254255294799805, "p90": 70.41006546020509, "max": 104.41378021240234, "pos_frac": 0.96875, "sample": [57.93374252319336, 17.719785690307617, 9.978828430175781, 104.41378021240234, 29.621353149414062, 52.95418930053711, 7.3269195556640625, 13.959846496582031, 16.953630447387695, 25.987228393554688, 72.03309631347656, 68.4177474975586, 91.15001678466797, 22.096385955810547, 16.672698974609375, 42.485809326171875, 6.867546081542969, 42.906227111816406, 37.62982177734375, 42.163352966308594, 54.38030242919922, 7.2485809326171875, 62.258419036865234, -1.6487808227539062, 40.46867370605469, 61.302452087402344, 26.555023193359375, 95.6226806640625, 19.99273681640625, 20.803115844726562, 42.59588623046875, 22.82845687866211, 26.997509002685547, 26.531841278076172, 35.80378723144531, 32.262413024902344, 58.952171325683594, 46.51579284667969, 37.26004409790039, 84.92247772216797, 13.468658447265625, 6.218149185180664, 53.777610778808594, 74.16645812988281, 28.94955062866211, 22.88601303100586, 45.937835693359375, 49.14250946044922, 25.527034759521484, 34.515228271484375, 45.86112976074219, 71.263916015625, 44.22344207763672, 63.18605041503906, 17.367095947265625, 49.0133056640625, 50.409908294677734, 49.3880615234375, 37.24846649169922, -10.785377502441406, 26.933067321777344, 15.417465209960938, 37.05439758300781, 62.467498779296875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000402.npy"}
|
|
{"epoch": 0.5903083700440529, "step": 403, "batch_size": 64, "mean": 30.17224884033203, "std": 28.46843719482422, "min": -40.982303619384766, "p10": -2.803588485717772, "median": 25.07079792022705, "p90": 72.03259353637695, "max": 105.57167053222656, "pos_frac": 0.859375, "sample": [21.57964324951172, 31.637351989746094, 7.486745834350586, 43.45537567138672, 29.680511474609375, 1.3486766815185547, 30.462020874023438, 18.53276824951172, 3.294342041015625, 8.489641189575195, 21.0322265625, -1.1534423828125, -4.401519775390625, 63.063899993896484, 1.2104034423828125, 20.717750549316406, 22.21518325805664, 13.527055740356445, 87.17294311523438, 54.933868408203125, 38.07402038574219, 19.532936096191406, 72.06295776367188, 7.002616882324219, -10.056922912597656, 89.31622314453125, 63.195587158203125, 23.075271606445312, 33.32809829711914, 72.02734375, -3.405590057373047, 58.168212890625, 105.57167053222656, 30.47555160522461, 24.517738342285156, 48.72211456298828, -4.73210334777832, 36.643768310546875, 73.96984100341797, 72.03484344482422, -40.982303619384766, 25.58545684814453, 50.274871826171875, -1.3989181518554688, 67.24854278564453, 9.894105911254883, 91.16848754882812, 35.362274169921875, 19.760597229003906, 34.20890808105469, 56.894107818603516, 24.55613899230957, 32.10502624511719, -8.179634094238281, 23.891515731811523, 17.040985107421875, 8.723979949951172, 30.50506591796875, 17.635360717773438, 40.607364654541016, 35.580162048339844, -9.906288146972656, 18.34564208984375, 28.292760848999023], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000403.npy"}
|
|
{"epoch": 0.591776798825257, "step": 404, "batch_size": 64, "mean": 30.536544799804688, "std": 28.906705856323242, "min": -27.0700626373291, "p10": -2.543499755859374, "median": 25.928820610046387, "p90": 70.06002960205079, "max": 102.101806640625, "pos_frac": 0.859375, "sample": [21.788414001464844, 18.977840423583984, 26.48411750793457, 102.101806640625, 19.981544494628906, 38.75306701660156, 16.930198669433594, 67.877197265625, 22.318683624267578, 88.39945983886719, 60.05457305908203, 10.946304321289062, 10.045459747314453, 65.16041564941406, 12.537368774414062, 6.06512451171875, 70.99552917480469, 39.63645935058594, -1.1295795440673828, 22.484601974487305, -20.058063507080078, 18.194854736328125, 35.652000427246094, 53.592071533203125, 53.49562072753906, 44.78600311279297, -27.0700626373291, 45.71161651611328, 40.936607360839844, 7.2691802978515625, -4.077791213989258, 7.043928146362305, 67.58833312988281, 46.86372375488281, 29.672149658203125, 0.8863849639892578, -7.06060791015625, 36.78407287597656, 60.82307434082031, 9.912099838256836, -1.7175369262695312, 24.907989501953125, 34.58340835571289, 79.55987548828125, 3.2912025451660156, -2.924560546875, 1.308675765991211, 25.373523712158203, 35.2562255859375, -3.9129257202148438, -2.8974838256835938, 36.08814239501953, 27.679683685302734, 11.874168395996094, 64.60140228271484, 90.15431213378906, 39.265235900878906, 9.829269409179688, 7.216840744018555, 2.582571029663086, 87.93243408203125, 42.549842834472656, 45.731201171875, 74.6517105102539], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000404.npy"}
|
|
{"epoch": 0.593245227606461, "step": 405, "batch_size": 64, "mean": 30.217140197753906, "std": 29.265989303588867, "min": -16.990333557128906, "p10": -1.7223726272583004, "median": 22.36363124847412, "p90": 71.99656982421875, "max": 116.83235168457031, "pos_frac": 0.875, "sample": [41.95573425292969, 65.00527954101562, 40.133384704589844, 22.71546745300293, 15.293357849121094, 8.6033935546875, -11.379104614257812, 13.5943603515625, 11.804647445678711, -2.67254638671875, 29.935422897338867, 51.993743896484375, 104.1762924194336, -16.990333557128906, 11.934484481811523, 79.88191223144531, 2.877836227416992, 17.306438446044922, 45.197044372558594, 0.8264923095703125, 36.199378967285156, 26.565536499023438, 17.20263671875, 50.640655517578125, 13.562417984008789, 23.79791831970215, 42.143287658691406, 18.266128540039062, 4.012054443359375, -4.369951248168945, 55.41984939575195, -1.8853130340576172, 17.69534683227539, 22.078290939331055, 50.686767578125, 14.851739883422852, 20.361495971679688, -1.3421783447265625, 2.243732452392578, 10.867412567138672, 71.8233642578125, 72.07080078125, 80.56375122070312, 47.404624938964844, 28.081165313720703, 45.36992645263672, 76.46755981445312, 14.936241149902344, 11.412841796875, 29.756378173828125, 44.806724548339844, 9.528907775878906, -13.310178756713867, 84.0216064453125, 12.135169982910156, 4.27252197265625, 43.52215576171875, 13.58917236328125, 56.34941864013672, 116.83235168457031, 54.23454284667969, -12.637351989746094, 22.648971557617188, 68.82582092285156], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000405.npy"}
|
|
{"epoch": 0.5947136563876652, "step": 406, "batch_size": 64, "mean": 31.772830963134766, "std": 28.3826961517334, "min": -30.908905029296875, "p10": 0.22066898345947494, "median": 28.54346466064453, "p90": 71.41479873657227, "max": 109.26414489746094, "pos_frac": 0.890625, "sample": [27.261062622070312, 9.227230072021484, 14.169792175292969, 46.37713623046875, 53.458343505859375, 13.674644470214844, 24.342018127441406, 14.660221099853516, 109.26414489746094, 28.652748107910156, 60.2286491394043, -0.7277069091796875, 86.63409423828125, 34.9232177734375, 44.52937316894531, 23.57010269165039, 35.179283142089844, 67.44679260253906, 53.38470458984375, -2.473630905151367, 59.199501037597656, 56.338768005371094, 43.766571044921875, -30.908905029296875, 36.44390869140625, 15.276260375976562, 12.97572135925293, -4.370006561279297, 40.222869873046875, -6.263710021972656, 5.918800354003906, 38.999759674072266, 22.969329833984375, -29.33819580078125, 3.8986854553222656, 72.42234802246094, 60.59974670410156, 14.263656616210938, 26.64250373840332, -1.9271278381347656, 32.458953857421875, 50.69207763671875, 81.67809295654297, 7.931388854980469, 15.255508422851562, 10.209770202636719, 72.0174560546875, 49.09931182861328, 8.403656005859375, 12.4459228515625, 70.00859832763672, 79.70359802246094, 15.021980285644531, 6.37274169921875, 2.857421875, 2.4335460662841797, 28.434181213378906, 30.416091918945312, 75.34587097167969, 50.523101806640625, 64.32339477539062, 43.24835205078125, 29.391643524169922, 14.275829315185547], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000406.npy"}
|
|
{"epoch": 0.5961820851688693, "step": 407, "batch_size": 64, "mean": 32.281612396240234, "std": 29.783952713012695, "min": -30.256179809570312, "p10": -5.100197982788085, "median": 28.05343246459961, "p90": 72.55829315185548, "max": 95.6281509399414, "pos_frac": 0.859375, "sample": [12.659818649291992, 71.18534851074219, 10.070938110351562, -10.356599807739258, -27.944355010986328, 28.154502868652344, 43.900474548339844, -4.030189514160156, 53.66778564453125, -5.558773040771484, 59.191802978515625, 30.000396728515625, 47.36054229736328, 81.07332611083984, 85.27397155761719, 2.601896286010742, 44.93686294555664, 58.18592071533203, 24.937705993652344, 22.60515594482422, 27.952362060546875, -5.8556976318359375, 21.769577026367188, 73.14669799804688, 66.34808349609375, 20.01580810546875, 61.763668060302734, 11.916072845458984, 61.93296813964844, 41.529258728027344, 5.499641418457031, 74.45133972167969, 21.879379272460938, 14.452491760253906, 17.747413635253906, 27.374038696289062, 36.265228271484375, 93.19363403320312, 38.511287689208984, -30.256179809570312, 37.44026184082031, 35.14104080200195, 9.509765625, 40.24512481689453, 55.94270324707031, 21.429367065429688, 7.839935302734375, 54.35418701171875, 36.14753723144531, 16.279495239257812, 95.6281509399414, -8.033729553222656, 10.43936538696289, 67.4822998046875, -0.4025421142578125, 6.97429084777832, 1.746572494506836, 65.51136016845703, 3.7097244262695312, 20.597198486328125, 82.79144287109375, 54.30426025390625, -9.627349853515625, 53.01921081542969], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000407.npy"}
|
|
{"epoch": 0.5976505139500734, "step": 408, "batch_size": 64, "mean": 27.853038787841797, "std": 27.439891815185547, "min": -19.864295959472656, "p10": -3.3349412918090815, "median": 26.381354331970215, "p90": 52.96138687133789, "max": 113.87642669677734, "pos_frac": 0.84375, "sample": [11.825660705566406, 74.95079040527344, 87.21572875976562, 23.88011932373047, 22.166885375976562, 47.14399719238281, 6.037200927734375, 29.608671188354492, 34.83769989013672, -9.978996276855469, 22.70207405090332, 7.6414337158203125, -10.475929260253906, 31.80609130859375, -19.864295959472656, -1.24774169921875, 38.29017639160156, 42.99058532714844, 23.304122924804688, 28.88258934020996, 23.450115203857422, 41.53047180175781, 52.90687561035156, 52.98474884033203, -11.006210327148438, 12.811775207519531, -3.6280879974365234, 6.3918914794921875, 21.81446075439453, 53.979156494140625, -16.37364387512207, 4.980255126953125, 49.303375244140625, 4.578550338745117, -10.779857635498047, 5.0823974609375, 35.96959686279297, 39.91542053222656, 20.596694946289062, 45.36190414428711, 49.1304931640625, 14.983306884765625, 34.10919189453125, 48.4937744140625, 36.27521514892578, 1.4481048583984375, 50.69633483886719, 11.107961654663086, -2.6509323120117188, 8.88690185546875, 4.259389877319336, 93.51812744140625, -0.22498321533203125, 5.010551452636719, 52.80842590332031, 40.527618408203125, 7.748847961425781, 31.75464630126953, 52.29515075683594, 43.96965026855469, 113.87642669677734, 83.56947326660156, 41.017242431640625, 38.396827697753906], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000408.npy"}
|
|
{"epoch": 0.5991189427312775, "step": 409, "batch_size": 64, "mean": 30.92287254333496, "std": 25.655141830444336, "min": -12.047889709472656, "p10": 1.1068693161010745, "median": 25.957639694213867, "p90": 69.23589859008788, "max": 100.93586730957031, "pos_frac": 0.921875, "sample": [-0.903961181640625, -12.047889709472656, 57.195350646972656, 25.089075088500977, 46.191123962402344, 54.953460693359375, 24.2464656829834, 40.9683837890625, 25.070541381835938, 27.765762329101562, 68.97527313232422, 32.566795349121094, 0.9565505981445312, 43.71421813964844, 76.88237762451172, 11.186752319335938, 11.236373901367188, 24.429866790771484, 34.181514739990234, 67.75131225585938, 27.672470092773438, -9.172370910644531, 73.50482940673828, 5.661174774169922, 69.34759521484375, 3.1330108642578125, 15.289093017578125, 16.049083709716797, 9.512672424316406, 31.546512603759766, 17.560195922851562, 84.27442169189453, 50.68901062011719, 17.100929260253906, 19.653711318969727, 83.83563232421875, -5.219871520996094, 32.5244026184082, -2.9642257690429688, 14.039405822753906, 28.219581604003906, 4.394523620605469, 38.1617431640625, 10.481632232666016, 27.26221466064453, 6.636451721191406, 26.826204299926758, 35.004371643066406, 65.25865936279297, 36.70028305053711, 14.624746322631836, 35.035804748535156, 1.4576129913330078, 19.585651397705078, 54.16223907470703, 80.9187240600586, 50.11239242553711, 46.063201904296875, 22.254989624023438, 17.496017456054688, 100.93586730957031, 0.6579742431640625, 21.92791748046875, 20.43799591064453], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000409.npy"}
|
|
{"epoch": 0.6005873715124816, "step": 410, "batch_size": 64, "mean": 27.859020233154297, "std": 26.466480255126953, "min": -19.219940185546875, "p10": -4.101694488525388, "median": 27.212435722351074, "p90": 69.64659042358399, "max": 89.92950439453125, "pos_frac": 0.84375, "sample": [55.063201904296875, 20.1373291015625, -1.9379653930664062, 1.4694442749023438, 39.11326599121094, 32.23569869995117, 37.481689453125, 70.62752532958984, 42.4979362487793, 6.023384094238281, 15.98097038269043, 17.43511199951172, 9.1295166015625, 53.370208740234375, 2.0526065826416016, 7.958049774169922, 76.80140686035156, -10.993186950683594, -5.639339447021484, 58.027427673339844, 62.38029098510742, 89.92950439453125, 24.244518280029297, 34.52088928222656, 43.302833557128906, 33.24205017089844, -5.516315460205078, -13.517887115478516, 67.35774230957031, 79.80035400390625, 17.435806274414062, -0.7150363922119141, 34.522361755371094, 11.298171997070312, 39.577239990234375, 8.688583374023438, -11.11085319519043, -0.6930675506591797, 7.893524169921875, 6.706695556640625, 5.8717193603515625, 38.695552825927734, 30.84099578857422, 72.77510070800781, 28.742942810058594, 4.4981536865234375, 40.48541259765625, 9.972763061523438, 76.81851196289062, 40.78105163574219, 35.33000183105469, 4.649892807006836, 28.83087158203125, 41.83720397949219, 36.95587921142578, 61.111053466796875, 25.681928634643555, 44.0796012878418, -19.219940185546875, -5.0290069580078125, 24.60045623779297, 11.505407333374023, 71.11582946777344, 15.864158630371094], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000410.npy"}
|
|
{"epoch": 0.6020558002936858, "step": 411, "batch_size": 64, "mean": 29.69510841369629, "std": 28.803665161132812, "min": -16.01111602783203, "p10": -2.8065376281738277, "median": 25.502796173095703, "p90": 65.8371192932129, "max": 123.35232543945312, "pos_frac": 0.859375, "sample": [34.397151947021484, 25.111141204833984, 19.200714111328125, -11.102783203125, -2.194000244140625, 5.282562255859375, 2.3358726501464844, 28.82728385925293, 25.327667236328125, 40.69416809082031, 23.853790283203125, 14.22700309753418, 66.6142807006836, 5.341320037841797, 12.238122940063477, 21.902772903442383, 11.11216926574707, 73.4620361328125, 25.83400535583496, 60.79212188720703, 106.57960510253906, 54.47132110595703, -3.0690536499023438, -5.2169189453125, -16.01111602783203, 33.07087707519531, 42.86039733886719, -5.7153472900390625, 20.363327026367188, 81.743408203125, 8.99505615234375, 53.28717803955078, 35.79425048828125, 15.392841339111328, 48.64830017089844, -2.093313217163086, 17.18950653076172, 64.02374267578125, 48.37845993041992, 28.439315795898438, 17.959524154663086, 48.02188491821289, 25.67792510986328, 53.018653869628906, 34.098289489746094, 31.908645629882812, 27.56789779663086, 4.79241943359375, 123.35232543945312, 47.010902404785156, 9.954387664794922, 75.68875885009766, 51.23461151123047, 93.00119018554688, 47.92247772216797, -13.521247863769531, 0.685150146484375, 27.76214599609375, 23.218116760253906, 7.778871536254883, 40.18870544433594, 2.30255126953125, 14.220108032226562, -3.746593475341797], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000411.npy"}
|
|
{"epoch": 0.6035242290748899, "step": 412, "batch_size": 64, "mean": 29.459095001220703, "std": 27.281654357910156, "min": -6.75126838684082, "p10": -2.548849487304687, "median": 24.636157989501953, "p90": 71.17525711059572, "max": 100.99809265136719, "pos_frac": 0.859375, "sample": [-3.1637821197509766, 57.384090423583984, 49.85974884033203, 32.38105773925781, 76.92167663574219, 41.22051239013672, 51.488189697265625, 24.285865783691406, 13.1798095703125, 84.98214721679688, 4.601863861083984, 7.4692535400390625, 15.553081512451172, 38.06389617919922, 5.508459091186523, 38.182037353515625, 34.78936004638672, -4.467350006103516, 17.33245849609375, 25.11773681640625, 35.439483642578125, 10.436973571777344, 34.432579040527344, 25.407377243041992, 15.301544189453125, 5.992885589599609, 1.6470184326171875, 21.615238189697266, -4.1508636474609375, 47.400657653808594, 8.745723724365234, 10.290050506591797, 31.819374084472656, 9.008804321289062, 41.824241638183594, 12.031820297241211, -2.0536155700683594, 16.421512603759766, 10.450614929199219, 78.6925048828125, 45.2041015625, -3.5255661010742188, 86.07713317871094, 41.16911315917969, 62.12083435058594, 22.521167755126953, 6.0428924560546875, 62.729034423828125, -2.2454833984375, 60.64353942871094, 64.82088470458984, 1.4386882781982422, 5.474090576171875, 12.333221435546875, 73.8985595703125, 100.99809265136719, -5.748750686645508, 50.487030029296875, 82.68061828613281, -6.75126838684082, 24.9864501953125, -2.678863525390625, 47.633331298828125, 33.62928771972656], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000412.npy"}
|
|
{"epoch": 0.604992657856094, "step": 413, "batch_size": 64, "mean": 29.902385711669922, "std": 25.80487823486328, "min": -32.733367919921875, "p10": 3.6720817565917985, "median": 25.96346092224121, "p90": 59.09532852172852, "max": 104.16268920898438, "pos_frac": 0.921875, "sample": [14.962799072265625, 0.8230705261230469, 24.303251266479492, 11.51988410949707, 54.80276870727539, -14.410873413085938, 2.9112625122070312, 26.736412048339844, 17.003868103027344, 17.393417358398438, 32.84463882446289, 59.16199493408203, 18.014686584472656, 55.72997283935547, 10.500396728515625, 17.594533920288086, 25.48284149169922, 6.255039215087891, 7.674041748046875, 104.16268920898438, 6.3448486328125, 58.93977355957031, 31.586944580078125, -32.733367919921875, 5.44732666015625, 38.94769287109375, 17.458953857421875, 16.071884155273438, 24.923721313476562, 78.96601867675781, 55.76893615722656, 24.56754493713379, 46.8780517578125, 39.3824462890625, 11.14422607421875, 78.67066955566406, 62.338104248046875, -5.71624755859375, 30.14366912841797, 88.08782958984375, 40.51762390136719, 37.27278137207031, 49.48797607421875, 38.49656677246094, 27.391525268554688, 54.16227722167969, 54.56922912597656, -7.9629974365234375, -7.1189727783203125, 29.035919189453125, 26.444080352783203, 37.25334167480469, 55.36723327636719, 12.580436706542969, 5.50651741027832, 31.76530647277832, 11.381637573242188, 83.92681884765625, 19.121479034423828, 15.139053344726562, 38.76109313964844, 48.793277740478516, 20.970977783203125, 20.203838348388672], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000413.npy"}
|
|
{"epoch": 0.6064610866372981, "step": 414, "batch_size": 64, "mean": 35.684043884277344, "std": 27.852243423461914, "min": -11.586601257324219, "p10": 3.590279960632325, "median": 30.399629592895508, "p90": 73.42118606567384, "max": 112.14984130859375, "pos_frac": 0.9375, "sample": [53.09192657470703, 37.421791076660156, 44.110660552978516, 6.309566497802734, 16.956195831298828, 38.41847229003906, 3.3405532836914062, -10.854873657226562, 12.808975219726562, 33.431060791015625, 25.993820190429688, 57.285675048828125, 72.62830352783203, 73.01123809814453, 19.003021240234375, 13.415506362915039, 36.502525329589844, 21.669448852539062, 4.172975540161133, 11.363779067993164, 30.147991180419922, 39.80980682373047, 17.901187896728516, 29.660953521728516, 28.53173828125, 52.59858703613281, 75.86659240722656, 19.885297775268555, 47.38859558105469, 55.90234375, 82.40850830078125, 41.924217224121094, 7.284400939941406, 21.51592254638672, 89.61898040771484, 13.16853141784668, 26.469833374023438, -11.586601257324219, -10.493759155273438, 18.186595916748047, 13.001358032226562, 54.119232177734375, 64.75568389892578, -3.0142574310302734, 2.0490036010742188, 112.14984130859375, 26.179229736328125, 64.02210998535156, 9.114620208740234, 40.19041061401367, 37.964752197265625, 57.538238525390625, 30.651268005371094, 40.55023193359375, 20.181659698486328, 40.6112060546875, 81.57476806640625, 2.3740997314453125, 61.0936279296875, 73.59687805175781, 107.20256042480469, 15.246021270751953, 29.96131134033203, 56.39459228515625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000414.npy"}
|
|
{"epoch": 0.6079295154185022, "step": 415, "batch_size": 64, "mean": 29.6812686920166, "std": 24.819744110107422, "min": -5.43707275390625, "p10": 2.4643175125122077, "median": 24.780954360961914, "p90": 58.76277389526368, "max": 114.40155029296875, "pos_frac": 0.9375, "sample": [61.66258239746094, 10.4290771484375, 54.750701904296875, 86.66181945800781, 6.07513427734375, 40.06243133544922, 34.31285095214844, 21.001361846923828, 26.749309539794922, 53.59716796875, 55.02959442138672, 39.672996520996094, 47.405189514160156, 58.034217834472656, 36.05120849609375, 10.88400650024414, 29.930503845214844, 4.49029541015625, 20.628036499023438, 7.8242950439453125, 5.259130477905273, 45.27471160888672, 11.719329833984375, 59.07501220703125, 18.59735870361328, 31.71853256225586, 74.86827087402344, -1.2668533325195312, 27.91796875, 38.59765625, 8.803253173828125, 12.52187728881836, 17.7017822265625, 0.6282806396484375, 114.40155029296875, 0.7968177795410156, -0.7562122344970703, 44.54103088378906, 44.599388122558594, 51.963653564453125, 4.244747161865234, 19.594959259033203, 2.2620162963867188, 14.235885620117188, 33.144901275634766, 40.043304443359375, 8.376775741577148, 14.272842407226562, 48.40093231201172, 23.458438873291016, -1.229583740234375, 54.99388122558594, 19.987985610961914, 26.103469848632812, 12.096076965332031, 2.9363536834716797, 12.094093322753906, -5.43707275390625, 86.0145492553711, 12.49755859375, 64.58485412597656, 44.619998931884766, 11.410346984863281, 38.67854309082031], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000415.npy"}
|
|
{"epoch": 0.6093979441997063, "step": 416, "batch_size": 64, "mean": 36.515438079833984, "std": 28.901609420776367, "min": -14.394332885742188, "p10": 1.2617874145507826, "median": 35.503929138183594, "p90": 72.17559127807617, "max": 114.41659545898438, "pos_frac": 0.921875, "sample": [37.108642578125, 12.880256652832031, 12.57366943359375, 67.62939453125, -14.394332885742188, 95.14813232421875, 32.80998229980469, 18.495643615722656, 50.648773193359375, 95.72159576416016, 37.86481475830078, 35.933937072753906, 37.771331787109375, 26.464717864990234, 54.214508056640625, 34.245582580566406, 11.132181167602539, -7.020683288574219, 72.41679382324219, 21.808696746826172, 35.87364196777344, 18.922767639160156, 9.656322479248047, 71.24767303466797, 71.61278533935547, 35.13421630859375, 45.168434143066406, 65.25686645507812, 0.3701934814453125, 43.8363037109375, 17.059112548828125, 36.851356506347656, 114.41659545898438, 4.2434234619140625, -2.9201316833496094, -10.003194808959961, 48.993553161621094, 65.65914916992188, 28.115074157714844, 23.692718505859375, 30.789993286132812, 109.18429565429688, 48.17771911621094, 18.352279663085938, 38.5374755859375, 41.1043586730957, 33.298057556152344, 77.9469985961914, 23.14158058166504, 17.190296173095703, 0.7447776794433594, 36.02497863769531, 8.3121337890625, 76.59806823730469, 20.529420852661133, -12.65887451171875, 65.06558227539062, 2.4681434631347656, 37.90983581542969, 23.373184204101562, 14.56120491027832, 45.46617889404297, 71.50856018066406, 52.72111511230469], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000416.npy"}
|
|
{"epoch": 0.6108663729809104, "step": 417, "batch_size": 64, "mean": 34.968475341796875, "std": 23.686065673828125, "min": -4.384225845336914, "p10": 3.187281608581546, "median": 37.111473083496094, "p90": 64.36595497131349, "max": 85.22840881347656, "pos_frac": 0.921875, "sample": [31.87544822692871, 36.15294647216797, 0.4079113006591797, 12.837432861328125, -0.4736976623535156, 39.12312316894531, 25.58349609375, 6.535272598266602, 9.654790878295898, 82.25772857666016, 25.815425872802734, 49.2850341796875, 14.381725311279297, 17.61892318725586, 47.21746826171875, 16.269065856933594, -4.384225845336914, 52.49406433105469, 54.773773193359375, 28.743377685546875, 49.14253234863281, 17.14093017578125, 39.573219299316406, 53.96482849121094, 53.01729965209961, 50.51478576660156, 56.13174819946289, 18.47240447998047, 64.69112396240234, 53.80946350097656, 6.293083190917969, 12.870330810546875, 16.343393325805664, 19.338523864746094, 36.25580596923828, 51.57643127441406, 63.0301628112793, 1.8842945098876953, 59.352203369140625, 37.967140197753906, -0.5571384429931641, 63.60722732543945, 42.25115966796875, 49.70534133911133, -3.7153778076171875, 75.88402557373047, 78.61431121826172, 10.92338752746582, 38.756805419921875, 40.653961181640625, 74.80122375488281, -2.2751731872558594, 46.48005294799805, 18.008567810058594, 31.269031524658203, 22.91986083984375, 6.2275848388671875, 49.726524353027344, 85.22840881347656, 70.64205169677734, 39.5189208984375, 47.461029052734375, 34.78584289550781, 9.526054382324219], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000417.npy"}
|
|
{"epoch": 0.6123348017621145, "step": 418, "batch_size": 64, "mean": 34.140960693359375, "std": 32.01416778564453, "min": -41.79039001464844, "p10": 0.5435695648193363, "median": 29.066360473632812, "p90": 76.29382858276374, "max": 127.02761840820312, "pos_frac": 0.90625, "sample": [53.718238830566406, 1.2579498291015625, 46.1069450378418, 4.458225250244141, 26.494049072265625, 106.06590270996094, 4.126106262207031, 14.600055694580078, 48.31036376953125, 44.44011306762695, 32.56878662109375, -14.879035949707031, 59.65830993652344, 30.97515869140625, 91.20783996582031, 56.57200622558594, 107.19198608398438, -20.044174194335938, 26.764312744140625, 11.378128051757812, 10.972061157226562, 92.774658203125, 21.18340301513672, 18.572021484375, 0.4018974304199219, 22.31818389892578, 56.275909423828125, 52.46240234375, 27.157562255859375, 9.617408752441406, 54.619384765625, 15.133926391601562, 34.620521545410156, 38.489349365234375, 12.74359130859375, 20.032203674316406, -4.324743270874023, 42.595577239990234, 15.413162231445312, 60.805030822753906, 127.02761840820312, -6.578338623046875, 24.847938537597656, 47.85101318359375, 41.52225875854492, 51.27416229248047, 24.181861877441406, 44.204994201660156, 1.6973876953125, -41.79039001464844, 17.235763549804688, 44.51554870605469, 48.64166259765625, 0.8741378784179688, 52.637176513671875, 16.6455078125, 18.149253845214844, 93.77346801757812, 9.393318176269531, -2.572986602783203, 52.503021240234375, 82.931884765625, 48.57353210449219, 54.646934509277344], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000418.npy"}
|
|
{"epoch": 0.6138032305433186, "step": 419, "batch_size": 64, "mean": 33.94792938232422, "std": 24.165023803710938, "min": -14.420141220092773, "p10": 8.107709503173828, "median": 29.13538932800293, "p90": 65.22945251464844, "max": 97.59650421142578, "pos_frac": 0.953125, "sample": [53.706878662109375, -14.420141220092773, 40.1512451171875, 41.289146423339844, 56.43782043457031, 22.199840545654297, -3.0291404724121094, -2.8379249572753906, 3.093170166015625, 62.5048828125, 28.410842895507812, 8.766677856445312, 60.97093200683594, 46.578094482421875, 29.27573585510254, 32.68937301635742, 31.307960510253906, 24.905086517333984, 16.87329864501953, 38.85762023925781, 31.965763092041016, 55.48077392578125, 8.521759033203125, 46.515357971191406, 12.57843017578125, 7.930259704589844, 14.718399047851562, 22.428110122680664, 85.68733215332031, 72.27587890625, 44.15370178222656, 3.2654056549072266, 49.41323471069336, 13.904624938964844, 13.084854125976562, 34.805152893066406, 26.924118041992188, 54.349395751953125, 35.926536560058594, 16.4901123046875, 72.89852905273438, 27.874282836914062, 32.828125, 18.667003631591797, 12.628835678100586, 66.39712524414062, 28.99504280090332, 45.370208740234375, 97.59650421142578, 56.028785705566406, 78.16278076171875, 12.318046569824219, 10.519859313964844, 92.16516876220703, 59.20139694213867, 57.0279541015625, 26.37657928466797, 6.868705749511719, 10.415847778320312, 40.30818176269531, 16.68450927734375, 28.357681274414062, 27.16071319580078, 19.665084838867188], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000419.npy"}
|
|
{"epoch": 0.6152716593245228, "step": 420, "batch_size": 64, "mean": 32.95982360839844, "std": 27.747848510742188, "min": -24.143829345703125, "p10": 3.983855056762696, "median": 28.494515419006348, "p90": 71.20892486572265, "max": 100.21612548828125, "pos_frac": 0.90625, "sample": [39.40443420410156, 31.43414306640625, 10.0330810546875, 22.98474884033203, 28.07733154296875, -14.400674819946289, 21.33448028564453, 9.53271484375, 52.54639434814453, 71.23894500732422, -2.2370452880859375, 33.03295135498047, 9.63015365600586, 3.616260528564453, 14.440277099609375, 20.443138122558594, 42.592041015625, 21.709362030029297, 13.80401611328125, 44.069671630859375, -13.755475997924805, 67.33219146728516, -9.342254638671875, 93.97154998779297, 19.069427490234375, 44.99053955078125, 49.05999755859375, 39.90703582763672, 19.138572692871094, 25.108482360839844, 35.57524871826172, 48.64945983886719, 64.06245422363281, 28.911699295043945, 35.52897644042969, 71.13887786865234, 10.818252563476562, 22.44631576538086, 43.715538024902344, 99.28727722167969, -1.250082015991211, 50.74273681640625, 73.63717651367188, 5.531776428222656, 50.58042907714844, 24.457874298095703, 76.94813537597656, 40.59735870361328, 69.43934631347656, 17.441314697265625, 18.344602584838867, -24.143829345703125, 5.3631134033203125, 45.40254592895508, 4.841575622558594, 100.21612548828125, 12.920196533203125, 29.198902130126953, 33.34544372558594, 20.92754364013672, 90.80769348144531, 22.86761474609375, 16.30010986328125, 56.01031494140625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000420.npy"}
|
|
{"epoch": 0.6167400881057269, "step": 421, "batch_size": 64, "mean": 27.947509765625, "std": 27.135541915893555, "min": -27.266998291015625, "p10": -2.39073028564453, "median": 26.37243938446045, "p90": 63.69434509277344, "max": 91.4513931274414, "pos_frac": 0.875, "sample": [53.27422332763672, 6.286285400390625, 31.142545700073242, 0.7112350463867188, 41.37700653076172, 29.71971893310547, 73.43354797363281, 10.085319519042969, 0.2431964874267578, 11.499553680419922, 40.62467956542969, 20.545291900634766, 56.23870849609375, 70.55010223388672, 43.049041748046875, 30.706146240234375, 61.81712341308594, -8.342735290527344, 27.017457962036133, 45.27935791015625, 91.36543273925781, 62.9639892578125, 42.34767150878906, 3.3840904235839844, 16.231369018554688, -1.1268081665039062, 37.5498046875, 19.396987915039062, 21.44355010986328, 29.473846435546875, 29.497459411621094, 38.621543884277344, 86.91244506835938, 49.67622375488281, 9.061763763427734, 3.273853302001953, 35.79250717163086, 17.348976135253906, -27.266998291015625, 91.4513931274414, 1.150146484375, -6.5229034423828125, 64.00735473632812, 85.74932861328125, 23.891799926757812, 39.019195556640625, 16.18622589111328, 16.9337215423584, 6.228288650512695, 18.10564422607422, 25.727420806884766, 20.58045196533203, 44.22382354736328, -15.02252197265625, -18.04204559326172, 34.84965133666992, 42.814064025878906, 3.8065223693847656, -10.770965576171875, 46.27020263671875, 2.091012954711914, -2.9324111938476562, 15.419792175292969, 32.21990966796875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000421.npy"}
|
|
{"epoch": 0.618208516886931, "step": 422, "batch_size": 64, "mean": 30.136600494384766, "std": 28.6448917388916, "min": -15.378921508789062, "p10": -5.154238891601561, "median": 26.464195251464844, "p90": 67.8453437805176, "max": 114.09463500976562, "pos_frac": 0.859375, "sample": [55.75636291503906, 63.789886474609375, 80.07323455810547, 38.915443420410156, 40.63671875, -5.8149566650390625, 2.7022857666015625, 76.18350219726562, 31.359716415405273, 114.09463500976562, -2.2789154052734375, 50.65376281738281, -3.6125640869140625, 11.365402221679688, 17.891773223876953, 11.645477294921875, 28.0286922454834, -14.784767150878906, 15.652446746826172, 58.67463684082031, 27.340660095214844, 108.43182373046875, 17.09386444091797, -6.067869186401367, 56.959781646728516, 53.75121307373047, 16.912948608398438, 14.244865417480469, 12.439641952514648, 36.64433288574219, 75.90406799316406, 70.96516418457031, 16.97900390625, 13.763481140136719, 6.838817596435547, 27.8364200592041, 69.5833969116211, -10.491279602050781, 30.393081665039062, -11.684135437011719, 0.8183937072753906, 30.80963897705078, 25.587730407714844, 0.05438232421875, 8.345806121826172, 55.53066635131836, 39.937255859375, 53.93933868408203, 20.071311950683594, 15.011331558227539, 48.24403762817383, 20.46435546875, 16.962310791015625, 23.06119155883789, 49.73597717285156, 49.06658935546875, 19.694091796875, -15.378921508789062, 49.874481201171875, 47.78181457519531, -9.754955291748047, 1.1322174072265625, 48.60662078857422, 30.37470245361328], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000422.npy"}
|
|
{"epoch": 0.6196769456681351, "step": 423, "batch_size": 64, "mean": 31.304004669189453, "std": 29.750967025756836, "min": -28.430749893188477, "p10": -2.4560897827148436, "median": 31.01981544494629, "p90": 74.03501815795902, "max": 104.8265151977539, "pos_frac": 0.875, "sample": [1.740325927734375, 43.887977600097656, -6.989715576171875, 35.175262451171875, -11.209182739257812, -9.306709289550781, 64.70616912841797, -5.042655944824219, 48.18180847167969, 44.70512390136719, 81.20259857177734, 10.69666862487793, 27.924758911132812, 36.521331787109375, 55.231502532958984, -6.071319580078125, 102.2619857788086, 104.8265151977539, 60.7161865234375, 46.36265182495117, 10.499099731445312, 4.3674468994140625, 47.705535888671875, 41.4913330078125, 78.03309631347656, 13.191925048828125, 20.830232620239258, 3.8342819213867188, -28.430749893188477, 2.5222625732421875, 26.124055862426758, 33.09950637817383, 15.222740173339844, 50.37500762939453, 101.71279907226562, 55.99540710449219, 9.061929702758789, 50.735679626464844, -2.474893569946289, 31.741836547851562, 31.584075927734375, 50.54054260253906, 32.870452880859375, 16.649662017822266, 99.930908203125, 30.90761947631836, 11.944061279296875, 8.461463928222656, 47.41911315917969, 10.900039672851562, 11.34725570678711, 13.438892364501953, 11.960845947265625, 47.41365051269531, 44.80152130126953, 31.13201141357422, -2.4122142791748047, 20.295557022094727, 21.442813873291016, 1.515899658203125, 47.259056091308594, 8.529853820800781, 34.530426025390625, 79.83309173583984], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000423.npy"}
|
|
{"epoch": 0.6211453744493393, "step": 424, "batch_size": 64, "mean": 34.16087341308594, "std": 30.783992767333984, "min": -15.3675537109375, "p10": -0.4809219360351554, "median": 30.520843505859375, "p90": 83.20059051513675, "max": 119.94644165039062, "pos_frac": 0.890625, "sample": [-9.975234985351562, 86.44996643066406, 34.36487579345703, 11.39616584777832, -0.9906387329101562, 32.41616439819336, 46.614532470703125, 57.85649871826172, 21.63819122314453, -6.73529052734375, 50.77696228027344, 24.28466796875, 40.07073974609375, 5.478700637817383, 34.80463790893555, 108.66712951660156, 23.55077362060547, 50.385345458984375, 27.665315628051758, 41.412208557128906, 48.54346466064453, 16.7768497467041, 98.77799987792969, 28.173255920410156, 45.94399642944336, -4.006927490234375, 0.408935546875, 61.193817138671875, -4.520542144775391, 33.20451736450195, 9.087392807006836, 102.75807189941406, 14.02560043334961, 20.216201782226562, 30.777023315429688, 52.18864440917969, 75.61871337890625, -15.3675537109375, 15.41912841796875, 25.671810150146484, 15.716047286987305, 26.075546264648438, -0.8622894287109375, 12.986930847167969, 32.828582763671875, 25.486976623535156, 34.4329833984375, 0.4185600280761719, 4.3088226318359375, 119.94644165039062, 24.96880340576172, 32.76499938964844, 30.264663696289062, 35.259483337402344, 92.97138977050781, 11.200458526611328, 38.94160461425781, 5.4762420654296875, 18.829374313354492, 115.4595947265625, 46.357627868652344, 51.80231857299805, 42.84865951538086, 32.79005432128906], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000424.npy"}
|
|
{"epoch": 0.6226138032305433, "step": 425, "batch_size": 64, "mean": 30.633705139160156, "std": 25.08201789855957, "min": -31.212093353271484, "p10": -0.8593372344970671, "median": 31.576122283935547, "p90": 61.771606063842775, "max": 103.90589141845703, "pos_frac": 0.890625, "sample": [-2.193584442138672, 2.5322799682617188, 50.553794860839844, -31.212093353271484, -11.697219848632812, 52.913421630859375, 20.492088317871094, 46.01338195800781, 31.149566650390625, 49.507659912109375, 12.780891418457031, 53.98347473144531, 5.053436279296875, 5.23358154296875, 28.963565826416016, 47.14543151855469, -2.28668212890625, 12.37701416015625, 33.637298583984375, 2.25390625, 39.917510986328125, 39.74348449707031, 8.77877426147461, 65.48652648925781, 64.06346130371094, -7.559608459472656, 30.778362274169922, 20.34807586669922, 61.99441909790039, 66.05096435546875, 27.039772033691406, 44.47015380859375, 42.586753845214844, 9.134162902832031, 48.90362548828125, 20.316619873046875, 35.71492004394531, 21.769378662109375, 15.345794677734375, 61.251708984375, 7.191577911376953, 57.66923522949219, 36.645172119140625, -4.5494384765625, 44.546104431152344, 58.448699951171875, 12.676807403564453, 44.25170135498047, 15.184562683105469, 103.90589141845703, 32.00267791748047, 50.30718231201172, 2.5753097534179688, 8.82513427734375, 14.522346496582031, 44.39326477050781, 44.03451156616211, 73.03329467773438, 37.51032257080078, 65.10896301269531, 18.92487335205078, -8.464378356933594, 54.201499938964844, 24.27582550048828], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000425.npy"}
|
|
{"epoch": 0.6240822320117474, "step": 426, "batch_size": 64, "mean": 33.87653350830078, "std": 26.95366096496582, "min": -29.2664794921875, "p10": 3.732493019104005, "median": 31.242584228515625, "p90": 70.53075485229493, "max": 98.32042694091797, "pos_frac": 0.921875, "sample": [20.487497329711914, 12.655784606933594, 12.451126098632812, 31.65283966064453, 19.02685546875, 27.263214111328125, 19.306121826171875, 46.22502899169922, 64.6983413696289, 31.348388671875, 31.13677978515625, -0.34667205810546875, 34.56856918334961, 3.391469955444336, 54.37407684326172, 20.082164764404297, 22.866182327270508, 46.63593292236328, 26.521570205688477, 17.57146453857422, -3.7211990356445312, 62.10078430175781, 4.5282135009765625, 30.969451904296875, 22.843978881835938, 26.471223831176758, 48.44784927368164, -10.746471405029297, 90.95045471191406, 85.03638458251953, 23.275943756103516, 98.32042694091797, 28.025108337402344, 18.63653564453125, 41.86894226074219, 89.17823028564453, 10.669326782226562, -25.995223999023438, 34.96440887451172, 88.6927719116211, 12.975845336914062, 37.49473571777344, 84.92774963378906, 53.611167907714844, 56.01831817626953, 19.17953109741211, 57.667625427246094, 72.45451354980469, 0.27800750732421875, 25.085102081298828, 37.60785675048828, 53.989990234375, 19.75049591064453, 34.44711685180664, 39.069583892822266, 42.164573669433594, 11.473989486694336, 66.04198455810547, 33.51884460449219, -29.2664794921875, 34.75926971435547, 50.95924377441406, 13.064468383789062, 34.39088439941406], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000426.npy"}
|
|
{"epoch": 0.6255506607929515, "step": 427, "batch_size": 64, "mean": 24.72673988342285, "std": 23.6002254486084, "min": -45.89013671875, "p10": 1.2410911560058606, "median": 18.56185531616211, "p90": 55.65249252319336, "max": 87.18797302246094, "pos_frac": 0.921875, "sample": [6.184356689453125, 11.921085357666016, 54.65080261230469, 77.97955322265625, 59.4998779296875, 51.506996154785156, 29.52445411682129, -3.9144744873046875, 51.08489990234375, 51.690391540527344, 9.496002197265625, 7.7047271728515625, -8.995254516601562, 12.734626770019531, 31.76300048828125, 27.29827880859375, 72.1010971069336, 62.07603454589844, 32.14801025390625, -45.89013671875, 14.473037719726562, 12.086090087890625, 35.086822509765625, 35.34297180175781, 13.098701477050781, 21.7469482421875, 16.29608154296875, 87.18797302246094, 18.437332153320312, 6.353153228759766, 31.258834838867188, 55.80292510986328, 40.18914031982422, 10.129941940307617, 9.14541244506836, 42.634647369384766, 6.949943542480469, 5.886684417724609, 19.453323364257812, -1.1329269409179688, 48.182151794433594, 22.37373161315918, 5.77630615234375, 46.86784744262695, 2.3616561889648438, 10.41201400756836, 30.222293853759766, 59.38602066040039, 0.697113037109375, 3.4302730560302734, 40.21788024902344, 15.052474975585938, 15.056900024414062, 18.686378479003906, 26.00601577758789, 15.932174682617188, 0.7608489990234375, 43.58116912841797, 55.301483154296875, -1.7167816162109375, 7.357112884521484, 12.520875930786133, 27.50830841064453, 5.545692443847656], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000427.npy"}
|
|
{"epoch": 0.6270190895741556, "step": 428, "batch_size": 64, "mean": 35.59562683105469, "std": 27.30299186706543, "min": -15.609573364257812, "p10": 4.78185272216797, "median": 31.157679557800293, "p90": 69.23292694091796, "max": 124.00845336914062, "pos_frac": 0.96875, "sample": [5.82342529296875, 7.812587738037109, 31.353124618530273, 38.4456787109375, 124.00845336914062, 28.174636840820312, 8.452709197998047, 51.752197265625, 44.177650451660156, 47.568572998046875, 26.19903564453125, 8.748245239257812, 88.82743835449219, 13.153305053710938, 105.97998809814453, -7.21942138671875, -15.609573364257812, 69.23786163330078, 16.815597534179688, 45.112701416015625, 51.95219421386719, 30.838470458984375, 17.73939323425293, 37.44672775268555, 37.676177978515625, 91.71904754638672, 29.600173950195312, 19.633010864257812, 15.188617706298828, 50.50038528442383, 43.128456115722656, 15.714553833007812, 31.353729248046875, 17.14545249938965, 74.93199920654297, 2.1648731231689453, 29.91388702392578, 52.616241455078125, 7.166065216064453, 3.770191192626953, 42.89833068847656, 30.962234497070312, 26.668067932128906, 29.27007293701172, 10.337379455566406, 51.52227020263672, 2.287506103515625, 30.27283477783203, 57.412628173828125, 37.58557891845703, 12.622547149658203, 47.15192413330078, 69.2214126586914, 4.3354644775390625, 64.22328186035156, 42.697532653808594, 51.426055908203125, 80.5297622680664, 46.10661315917969, 18.827014923095703, 66.14675903320312, 1.3312911987304688, 16.35430908203125, 38.915428161621094], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000428.npy"}
|
|
{"epoch": 0.6284875183553598, "step": 429, "batch_size": 64, "mean": 29.67691421508789, "std": 29.05752182006836, "min": -25.14111328125, "p10": -1.7938974380493165, "median": 23.155555725097656, "p90": 68.45147705078125, "max": 120.18927001953125, "pos_frac": 0.859375, "sample": [21.033653259277344, 32.648681640625, 11.255931854248047, 68.51409912109375, -5.105796813964844, 38.503211975097656, 4.454353332519531, 20.64923667907715, 46.9320068359375, 21.686309814453125, -7.422950744628906, 42.71531677246094, -1.7742080688476562, 11.774358749389648, 3.9075775146484375, 60.98003005981445, 51.529571533203125, 3.8551101684570312, 120.18927001953125, 18.629241943359375, 77.25743103027344, 93.90151977539062, 1.073699951171875, 51.408233642578125, 17.715713500976562, 4.701763153076172, 71.42711639404297, 21.328044891357422, 62.791595458984375, 34.096412658691406, -8.983531951904297, 24.673751831054688, 82.0224838256836, -25.14111328125, 8.531181335449219, -1.7086639404296875, -10.709892272949219, 24.624801635742188, 31.292360305786133, 13.462739944458008, 19.61901092529297, 40.87310791015625, 15.542579650878906, 18.324081420898438, 79.48159790039062, 12.205413818359375, 61.19603729248047, 7.841850280761719, 11.643718719482422, -19.660308837890625, 29.172393798828125, 47.90516662597656, 54.463539123535156, 68.30535888671875, 9.489355087280273, 5.1867218017578125, -1.8023357391357422, 32.82576370239258, 55.328956604003906, 33.17871856689453, 50.414188385009766, 34.404815673828125, 50.724082946777344, 43.93797302246094], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000429.npy"}
|
|
{"epoch": 0.6299559471365639, "step": 430, "batch_size": 64, "mean": 30.790908813476562, "std": 26.75757598876953, "min": -11.660322189331055, "p10": -3.097039031982418, "median": 25.939796447753906, "p90": 66.58873291015625, "max": 106.94229125976562, "pos_frac": 0.890625, "sample": [50.4027099609375, 10.039810180664062, 43.908966064453125, -6.723560333251953, 73.505615234375, 18.81192398071289, 24.3577880859375, 21.525775909423828, 18.383649826049805, 32.507118225097656, 2.2525291442871094, 18.138397216796875, 5.7283172607421875, 25.025848388671875, 20.696380615234375, 51.087249755859375, -10.22750473022461, 40.128997802734375, 24.555862426757812, 6.878765106201172, 14.362213134765625, -4.744239807128906, 32.06996536254883, 40.958805084228516, 10.546846389770508, 10.316314697265625, 34.913421630859375, 54.56292724609375, 0.746429443359375, 43.787757873535156, 21.507638931274414, -10.768903732299805, 70.16326904296875, 33.262306213378906, 19.41364288330078, 44.59248352050781, 7.511314392089844, -11.357147216796875, 9.164228439331055, 37.74818420410156, 58.72657775878906, 20.571632385253906, 51.440513610839844, 24.906455993652344, 26.853744506835938, 89.54547119140625, 44.23350524902344, 34.59205627441406, 67.14874267578125, 10.691864013671875, 31.003231048583984, 19.843143463134766, 65.28204345703125, 34.48004150390625, -11.660322189331055, 50.861724853515625, 83.19566345214844, -8.0867919921875, 57.014373779296875, 106.94229125976562, 99.24786376953125, 37.63526916503906, 31.72270965576172, 14.684200286865234], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000430.npy"}
|
|
{"epoch": 0.631424375917768, "step": 431, "batch_size": 64, "mean": 34.62909698486328, "std": 26.41916847229004, "min": -6.287508010864258, "p10": 2.4207304000854495, "median": 30.567895889282227, "p90": 76.36001129150391, "max": 109.81414794921875, "pos_frac": 0.953125, "sample": [2.0958251953125, 30.039905548095703, 4.787708282470703, 50.316192626953125, 39.71757507324219, 13.594406127929688, 77.47158813476562, 20.72716522216797, 11.510429382324219, 32.934593200683594, 4.5437164306640625, 39.162940979003906, 2.698484420776367, 1.4803352355957031, 2.3016929626464844, 8.065078735351562, 2.2085037231445312, 3.6968231201171875, 12.433418273925781, -0.9259452819824219, 39.53178405761719, 39.18159484863281, 50.109283447265625, 74.41464233398438, 11.055660247802734, 31.09588623046875, 77.19374084472656, 38.278377532958984, 78.0201416015625, 60.23112487792969, 73.08709716796875, 23.856346130371094, 58.902435302734375, 54.3603630065918, 43.220733642578125, -2.4367523193359375, 109.81414794921875, 8.474632263183594, 56.946205139160156, 31.226898193359375, 79.97989654541016, 51.5579833984375, 49.2711181640625, -6.287508010864258, 23.869873046875, 23.81633758544922, 80.34170532226562, 18.13408851623535, 20.023256301879883, 53.930137634277344, 61.294464111328125, 27.025039672851562, 21.58679962158203, 40.603187561035156, 26.57027244567871, 55.317298889160156, 21.592676162719727, 17.729217529296875, 57.004913330078125, 21.638322830200195, 82.26618957519531, 19.180130004882812, 10.306499481201172, 44.08558654785156], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000431.npy"}
|
|
{"epoch": 0.6328928046989721, "step": 432, "batch_size": 64, "mean": 32.3117790222168, "std": 27.88199234008789, "min": -25.693618774414062, "p10": -0.7496078491210927, "median": 29.696426391601562, "p90": 65.77025909423828, "max": 124.13148498535156, "pos_frac": 0.890625, "sample": [34.470306396484375, 13.55820083618164, 28.74871826171875, 15.525211334228516, 35.254180908203125, -1.2584152221679688, 16.566818237304688, 22.994953155517578, 24.31955337524414, 3.0241012573242188, -3.859264373779297, -1.2264671325683594, 49.665740966796875, 60.37053680419922, 37.797306060791016, 124.13148498535156, 30.251388549804688, 43.808990478515625, 30.452232360839844, -8.784904479980469, 4.031654357910156, 26.979415893554688, 66.16645812988281, 24.750900268554688, 36.366607666015625, 77.93799591064453, 29.141464233398438, 0.3630638122558594, 31.15704345703125, 21.00554656982422, 27.552505493164062, 7.322090148925781, 35.262203216552734, 31.575366973876953, 4.74664306640625, 105.49640655517578, 40.02297592163086, -25.693618774414062, 38.63201904296875, 24.66912841796875, 7.948814392089844, -2.1251373291015625, 64.84579467773438, 20.423324584960938, 62.09638214111328, 49.23206329345703, 31.584701538085938, 82.427490234375, 47.89811706542969, -4.04913330078125, 18.886917114257812, 59.38505554199219, 23.44601821899414, 11.171974182128906, 38.603965759277344, 24.169116973876953, 100.19037628173828, 11.330028533935547, 50.630706787109375, 50.66046142578125, 12.702995300292969, 42.15138244628906, 32.13207244873047, 68.91383361816406], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000432.npy"}
|
|
{"epoch": 0.6343612334801763, "step": 433, "batch_size": 64, "mean": 28.365665435791016, "std": 26.73403549194336, "min": -26.01568603515625, "p10": -3.5823831558227535, "median": 25.44792652130127, "p90": 66.77043685913087, "max": 93.929443359375, "pos_frac": 0.84375, "sample": [9.499252319335938, 46.83619689941406, 76.94300842285156, 32.33671569824219, 44.312442779541016, -2.7405052185058594, 18.331193923950195, -10.618701934814453, 66.00806427001953, 49.02989196777344, 3.66290283203125, -3.7364120483398438, 93.929443359375, 28.767868041992188, 22.850631713867188, 67.50007629394531, 40.61976623535156, 66.30494689941406, 54.311004638671875, 16.7535400390625, -3.222982406616211, 39.62146759033203, 41.368919372558594, 29.588157653808594, 21.164106369018555, 25.570587158203125, 12.891960144042969, 40.324928283691406, 27.932418823242188, 36.73704147338867, 3.03961181640625, 35.750396728515625, -7.607574462890625, 17.122314453125, 5.0133514404296875, 49.02735900878906, 66.96993255615234, -6.1447296142578125, 16.076080322265625, 6.761486053466797, 92.84504699707031, 55.36456298828125, 24.184593200683594, -10.288955688476562, -6.9521484375, 25.325265884399414, 25.266944885253906, 17.64801788330078, 11.519519805908203, -26.01568603515625, 71.58280944824219, 1.7373809814453125, 31.045372009277344, 2.048614501953125, 10.052156448364258, 9.280052185058594, 43.725982666015625, 31.71540069580078, -1.3043270111083984, 82.53628540039062, 43.927772521972656, 34.839263916015625, 49.00421142578125, 17.428237915039062], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000433.npy"}
|
|
{"epoch": 0.6358296622613803, "step": 434, "batch_size": 64, "mean": 29.513980865478516, "std": 25.28011131286621, "min": -11.158075332641602, "p10": -0.0903049468994126, "median": 24.724658966064453, "p90": 57.471968078613294, "max": 113.81451416015625, "pos_frac": 0.890625, "sample": [58.908851623535156, 48.097633361816406, 113.81451416015625, 1.5989341735839844, -7.2209930419921875, 4.5305938720703125, 33.25751495361328, 20.99590301513672, -11.158075332641602, -0.6878433227539062, 38.03692626953125, 54.119239807128906, 21.32206153869629, 86.17977905273438, 17.99686050415039, 1.3039512634277344, 23.07006072998047, 14.00164794921875, 31.89281463623047, 87.32478332519531, 10.561233520507812, 24.534671783447266, 43.711456298828125, 8.198829650878906, 21.846572875976562, 37.7049560546875, 20.729623794555664, 20.858625411987305, 27.095048904418945, 12.906028747558594, 22.678451538085938, 32.549903869628906, 35.205650329589844, -6.731048583984375, 34.08345031738281, 37.7464485168457, -2.150543212890625, 8.443483352661133, 42.76666259765625, 50.2147102355957, 6.189079284667969, 71.67816925048828, 47.68682861328125, -5.463951110839844, 32.784568786621094, 31.991031646728516, 34.598175048828125, 24.91464614868164, 27.215190887451172, 64.54306030273438, 48.68803405761719, 10.850461959838867, 42.24580764770508, 18.78122329711914, 19.513160705566406, 17.711875915527344, 44.18301773071289, 41.804779052734375, 33.70885467529297, 23.538978576660156, 102.3184585571289, 12.036386489868164, 18.83014678955078, -1.792520523071289], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000434.npy"}
|
|
{"epoch": 0.6372980910425844, "step": 435, "batch_size": 64, "mean": 32.3472900390625, "std": 29.603843688964844, "min": -36.06764221191406, "p10": 1.6940469741821302, "median": 27.134838104248047, "p90": 77.46041717529297, "max": 114.38360595703125, "pos_frac": 0.9375, "sample": [78.39309692382812, -4.922710418701172, 13.5562744140625, 43.68182373046875, 62.89335632324219, 35.50236129760742, 6.357126235961914, 23.235565185546875, 73.82659912109375, 27.17656707763672, 69.47360229492188, 35.903900146484375, 4.7804107666015625, 56.39757537841797, 24.27703857421875, 80.4052734375, 35.995033264160156, 14.330520629882812, 30.163421630859375, 1.1572189331054688, -4.286964416503906, 14.00592041015625, 0.18109130859375, 25.12176513671875, 0.7306327819824219, 30.703266143798828, 7.970676422119141, 27.093109130859375, -10.5802001953125, 9.184904098510742, 41.98968505859375, 2.946645736694336, 37.93895721435547, 29.677490234375, 49.489341735839844, 46.359683990478516, 58.434059143066406, -36.06764221191406, 4.7152099609375, 8.657344818115234, 15.540283203125, 7.487266540527344, 79.74098205566406, 8.667518615722656, 43.424903869628906, 83.30552673339844, 75.28416442871094, 56.06111145019531, 14.833946228027344, 4.022409439086914, 114.38360595703125, 18.596860885620117, 43.64385223388672, 17.927688598632812, 80.58209991455078, 96.71363830566406, 9.792259216308594, 16.20379638671875, 12.00650405883789, 58.46330261230469, 29.10842514038086, 21.21533966064453, 33.75677490234375, 72.61519622802734], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000435.npy"}
|
|
{"epoch": 0.6387665198237885, "step": 436, "batch_size": 64, "mean": 31.527891159057617, "std": 24.287145614624023, "min": -6.603973388671875, "p10": 1.821120834350588, "median": 32.005001068115234, "p90": 62.72529678344727, "max": 122.19950866699219, "pos_frac": 0.921875, "sample": [-0.4515380859375, -6.603973388671875, -5.0181121826171875, 32.56963348388672, 60.818641662597656, 44.77760314941406, 13.8759765625, 49.52922058105469, 21.730712890625, 7.0438690185546875, 37.27751159667969, 75.52275085449219, 16.699569702148438, 11.448633193969727, 49.097564697265625, 37.18323516845703, 0.9489631652832031, 32.9471435546875, 50.88506317138672, -4.935699462890625, 23.214187622070312, 35.55812072753906, 44.16114807128906, 42.413536071777344, 74.85223388671875, 33.730369567871094, 50.2357177734375, 35.854827880859375, 16.27446746826172, 38.720123291015625, 91.10537719726562, 11.6226806640625, 122.19950866699219, 21.760269165039062, 63.54243469238281, 12.868877410888672, 14.943367004394531, 35.757102966308594, 39.046688079833984, 27.137237548828125, 68.86053466796875, 11.28714370727539, 25.269439697265625, 12.368247985839844, 42.835060119628906, 32.882484436035156, 12.07748794555664, 0.32569122314453125, 34.639583587646484, 15.443733215332031, 25.89087677001953, 30.304054260253906, 12.321487426757812, 41.368927001953125, 65.08492279052734, 12.694747924804688, 49.76757049560547, 15.70241928100586, 3.8561553955078125, 31.44036865234375, -3.958375930786133, 46.15394973754883, 47.767822265625, 23.05567169189453], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000436.npy"}
|
|
{"epoch": 0.6402349486049926, "step": 437, "batch_size": 64, "mean": 28.168094635009766, "std": 23.734500885009766, "min": -14.482528686523438, "p10": -3.9695531845092753, "median": 25.354019165039062, "p90": 61.31471595764161, "max": 75.3101806640625, "pos_frac": 0.875, "sample": [14.98736572265625, -2.0578765869140625, 15.414596557617188, 27.393217086791992, 16.520965576171875, -14.092733383178711, 43.466888427734375, 34.355186462402344, 25.2861328125, 0.0396881103515625, 7.371097564697266, 23.358478546142578, -5.55535888671875, 69.57962799072266, 15.078804016113281, 32.41966247558594, 66.69471740722656, -12.016210556030273, -8.270149230957031, 55.79638671875, 14.346923828125, 52.688018798828125, 23.29298973083496, 38.46788787841797, 11.293342590332031, 75.3101806640625, 13.851371765136719, 9.071640014648438, 66.41587829589844, 17.797378540039062, 61.93086242675781, 50.69013977050781, 63.06788635253906, 1.6590118408203125, 53.19316101074219, -14.482528686523438, 29.846200942993164, 9.302911758422852, 59.87704086303711, 25.421905517578125, 37.310325622558594, 22.186691284179688, 30.5932559967041, 54.57522964477539, 59.429039001464844, 8.908451080322266, 57.40985107421875, 12.599067687988281, 39.581756591796875, 44.28858947753906, -4.788843154907227, 2.244171142578125, 42.92498779296875, -10.46490478515625, 16.706459045410156, 67.19384765625, 35.80621337890625, 40.74253845214844, 16.630775451660156, 46.75624084472656, 48.679500579833984, 16.046485900878906, 31.118228912353516, 17.4674072265625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000437.npy"}
|
|
{"epoch": 0.6417033773861968, "step": 438, "batch_size": 64, "mean": 34.36668395996094, "std": 27.218605041503906, "min": -15.934722900390625, "p10": 5.981065368652344, "median": 25.980530738830566, "p90": 71.68935546875001, "max": 103.27169799804688, "pos_frac": 0.9375, "sample": [10.868288040161133, 24.667009353637695, 51.63307189941406, 15.72344970703125, 12.778104782104492, 18.184524536132812, 91.72218322753906, -3.8825931549072266, 22.944053649902344, 14.903854370117188, 23.73009490966797, 19.383323669433594, 32.5223388671875, 16.533004760742188, 75.20478820800781, 31.69970703125, 65.33010864257812, 62.636558532714844, 21.73788833618164, 6.500083923339844, 17.114295959472656, 67.60116577148438, 58.44978332519531, 84.92115020751953, 36.44465637207031, 30.38806915283203, 35.73743438720703, 56.4913330078125, -15.934722900390625, 5.758628845214844, 10.471258163452148, 6.995353698730469, 56.48184585571289, 49.65130615234375, 77.93753051757812, 17.649253845214844, 17.206499099731445, 16.09076690673828, 53.38859558105469, 60.35980224609375, 103.27169799804688, 72.834716796875, 8.091962814331055, 68.11286926269531, -8.261810302734375, 13.798171997070312, 32.80424118041992, 34.11225891113281, 27.294052124023438, 58.83025360107422, 18.63492202758789, 69.016845703125, 62.955039978027344, 23.14519500732422, 0.12787818908691406, 5.621824264526367, 15.869636535644531, 11.885643005371094, 35.152061462402344, 59.79057693481445, 24.1658935546875, 81.96826934814453, -5.2573699951171875, 27.479087829589844], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000438.npy"}
|
|
{"epoch": 0.6431718061674009, "step": 439, "batch_size": 64, "mean": 26.97458839416504, "std": 24.870941162109375, "min": -20.33409881591797, "p10": 1.1402645111083987, "median": 24.122360229492188, "p90": 62.87066078186036, "max": 98.43869018554688, "pos_frac": 0.90625, "sample": [41.52153778076172, 93.10154724121094, 8.747631072998047, 45.0307731628418, -3.0170650482177734, 7.4014739990234375, 27.167823791503906, 57.455047607421875, 34.21990203857422, 32.86578369140625, 13.277645111083984, 41.10557556152344, 15.678466796875, 1.4432754516601562, 12.978195190429688, 31.226835250854492, 27.486528396606445, 3.36309814453125, 20.574356079101562, 9.40667724609375, 7.825239181518555, 64.50342559814453, 34.31245422363281, 4.607452392578125, 39.23827362060547, -6.8011474609375, 28.18114471435547, 32.14352798461914, -1.198953628540039, 18.03004264831543, 66.97236633300781, 6.922187805175781, 37.698883056640625, 31.339061737060547, 98.43869018554688, 43.20857238769531, 61.84715270996094, 69.0189208984375, 31.439064025878906, 29.17359161376953, 4.6746978759765625, -18.138206481933594, 15.216934204101562, 7.812187194824219, 61.15814971923828, 8.965778350830078, 31.43310546875, 22.81470489501953, 20.894241333007812, 25.430015563964844, 75.62263488769531, 17.687007904052734, 8.65423583984375, 49.72113037109375, 7.94450569152832, 28.452951431274414, 63.30930709838867, 22.123695373535156, -4.067478179931641, 4.9058380126953125, 53.193092346191406, 19.953807830810547, -20.33409881591797, 1.0104026794433594], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000439.npy"}
|
|
{"epoch": 0.644640234948605, "step": 440, "batch_size": 64, "mean": 25.880369186401367, "std": 27.163881301879883, "min": -22.038894653320312, "p10": -6.573911285400387, "median": 19.851463317871094, "p90": 65.4570785522461, "max": 114.24848937988281, "pos_frac": 0.859375, "sample": [38.6461067199707, 11.1190185546875, 50.32643127441406, 45.16130065917969, 53.6573486328125, 20.079452514648438, 26.770225524902344, 32.51551055908203, 19.194385528564453, 71.62940216064453, 18.018522262573242, 18.172809600830078, 69.30165100097656, 5.384208679199219, 35.603912353515625, 4.59014892578125, 30.166034698486328, -18.765609741210938, 7.766563415527344, 14.698997497558594, 19.125518798828125, 44.39972686767578, 29.615936279296875, 19.330215454101562, 28.62622833251953, 22.450668334960938, -17.187204360961914, -1.9698028564453125, 8.408935546875, 5.118276596069336, 37.034423828125, 79.80740356445312, -13.463409423828125, 33.37590026855469, 16.213586807250977, 14.964981079101562, 63.4764404296875, -22.038894653320312, 22.919204711914062, 3.126462936401367, 53.27679443359375, 20.90959930419922, 81.27824401855469, 59.476287841796875, 19.62347412109375, 16.870643615722656, 25.77025604248047, 6.397010803222656, 44.04447937011719, 66.30592346191406, 21.51233673095703, 114.24848937988281, 12.995002746582031, 17.148157119750977, 8.479337692260742, 71.16573333740234, 10.045093536376953, -13.141487121582031, -8.270912170410156, -2.6142425537109375, 39.63904571533203, 53.514923095703125, -10.9967041015625, 1.2951507568359375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000440.npy"}
|
|
{"epoch": 0.6461086637298091, "step": 441, "batch_size": 64, "mean": 30.808921813964844, "std": 24.265342712402344, "min": -39.81626892089844, "p10": 3.58338966369629, "median": 28.79450035095215, "p90": 58.299786758422854, "max": 102.75078582763672, "pos_frac": 0.9375, "sample": [39.48384094238281, 2.800809860229492, 20.08026123046875, 58.893653869628906, 40.38323211669922, 49.47669219970703, 24.71617889404297, 24.349349975585938, 44.391578674316406, 8.050308227539062, 11.311946868896484, 30.72649574279785, 36.9643440246582, 56.91409683227539, 17.617435455322266, 50.540679931640625, 11.609132766723633, 21.879898071289062, 13.474960327148438, 49.6287841796875, 1.2844581604003906, 3.3216552734375, 28.670013427734375, 74.93307495117188, 50.98735046386719, 20.85778045654297, 8.322189331054688, 53.941261291503906, 20.035125732421875, 102.75078582763672, 7.619283676147461, -8.227752685546875, 37.97216796875, 33.335166931152344, 29.995361328125, 30.98713493347168, 4.194103240966797, 49.80591583251953, 67.19628143310547, 17.7442626953125, 31.584793090820312, 27.817337036132812, -2.9727020263671875, -39.81626892089844, 51.24473571777344, 28.525100708007812, 34.82912826538086, 83.41060638427734, 38.28179931640625, 20.035621643066406, 12.269241333007812, 15.277130126953125, 50.85755920410156, 27.273849487304688, 14.608711242675781, 32.581451416015625, 68.86366271972656, 24.783233642578125, 28.227088928222656, 86.18128967285156, 28.918987274169922, -7.490680694580078, 35.3313102722168, 32.12886047363281], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000441.npy"}
|
|
{"epoch": 0.6475770925110133, "step": 442, "batch_size": 64, "mean": 37.83577346801758, "std": 26.97312355041504, "min": -12.356636047363281, "p10": 9.353521347045898, "median": 33.585201263427734, "p90": 71.43325958251954, "max": 102.5697021484375, "pos_frac": 0.96875, "sample": [31.666297912597656, 19.956409454345703, 59.29777145385742, 1.1578521728515625, 45.597801208496094, 34.541351318359375, 11.035247802734375, 38.56797790527344, 22.547256469726562, 34.13430404663086, 14.167095184326172, 72.10831451416016, 44.39386749267578, 52.7110595703125, 42.477142333984375, 85.3658218383789, 69.8581314086914, 13.93209457397461, 18.305923461914062, 35.17184066772461, 9.646438598632812, 60.52250671386719, 16.677438735961914, 64.62422180175781, 16.242918014526367, 51.748600006103516, 90.66539001464844, 16.27637481689453, 60.77032470703125, 55.742835998535156, 33.866416931152344, 14.57762336730957, 11.31863784790039, 1.7699165344238281, 25.95075225830078, 60.7698974609375, -5.877708435058594, 100.75370025634766, 45.08964157104492, 61.61546325683594, 19.548263549804688, 32.22132873535156, 34.801422119140625, 9.227985382080078, 20.336639404296875, 82.2606430053711, 66.96455383300781, 28.81085205078125, 96.5226821899414, 35.68824005126953, 30.806922912597656, 5.4709014892578125, -12.356636047363281, 33.303985595703125, 16.190635681152344, 1.6237335205078125, 51.86650848388672, 18.42931365966797, 102.5697021484375, 24.40240478515625, 65.13198852539062, 19.83330535888672, 31.56098175048828, 66.52816772460938], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000442.npy"}
|
|
{"epoch": 0.6490455212922174, "step": 443, "batch_size": 64, "mean": 28.547142028808594, "std": 27.842365264892578, "min": -28.24059295654297, "p10": -6.608889007568359, "median": 27.326040267944336, "p90": 63.84203567504884, "max": 101.35150909423828, "pos_frac": 0.828125, "sample": [101.35150909423828, 8.97713851928711, 64.98435974121094, 4.3276214599609375, 26.153640747070312, -5.7119598388671875, 34.59135437011719, -0.3467864990234375, 42.088932037353516, 27.805578231811523, 2.002044677734375, 33.968963623046875, 26.84650230407715, 17.507369995117188, 18.481239318847656, -17.89244842529297, 36.587860107421875, -8.396026611328125, 74.3504409790039, 30.212556838989258, 74.33384704589844, 76.36013793945312, 54.135215759277344, 39.5925178527832, -28.24059295654297, 21.15850830078125, 49.888214111328125, 23.383827209472656, 45.73343276977539, 40.19651794433594, 38.69329833984375, 46.48197937011719, -0.3098602294921875, 8.087310791015625, 15.130390167236328, 14.616386413574219, 32.76776885986328, 56.593658447265625, -16.16509246826172, -6.8787384033203125, -5.979240417480469, -8.927192687988281, 49.1151123046875, 30.411510467529297, 2.012836456298828, 2.1031646728515625, 48.96952819824219, 16.511428833007812, 35.5458984375, 61.176612854003906, 57.717132568359375, 14.811649322509766, 15.447616577148438, 33.13995361328125, -11.960281372070312, 55.96923065185547, 17.30561065673828, 53.59895324707031, 25.305519104003906, 7.2340545654296875, 80.08160400390625, 90.697998046875, 16.60723114013672, 36.672454833984375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000443.npy"}
|
|
{"epoch": 0.6505139500734214, "step": 444, "batch_size": 64, "mean": 34.26472473144531, "std": 28.675121307373047, "min": -25.827930450439453, "p10": 0.5260257720947279, "median": 31.261930465698242, "p90": 67.79990997314454, "max": 118.71453094482422, "pos_frac": 0.890625, "sample": [21.484085083007812, 2.243682861328125, 8.094169616699219, 63.18749237060547, 2.6780357360839844, 34.95236587524414, 29.55349349975586, 68.51702880859375, 32.970367431640625, -25.827930450439453, 61.48159408569336, 66.12663269042969, 58.482154846191406, 57.385963439941406, 103.24452209472656, 20.344345092773438, 50.294525146484375, 19.749420166015625, 24.29473114013672, 1.8739471435546875, 11.222959518432617, -6.531147003173828, 79.36463928222656, 118.71453094482422, -0.051654815673828125, 53.134849548339844, 46.192901611328125, -0.732330322265625, 19.581340789794922, 41.395538330078125, 41.50707244873047, 102.6728744506836, 56.11856460571289, 6.945009231567383, 11.30816650390625, 80.47266387939453, 23.536529541015625, 45.468910217285156, 62.65077209472656, 20.571205139160156, 13.605033874511719, 59.444915771484375, 51.35508728027344, -6.5710296630859375, 34.80853271484375, 38.960655212402344, 25.122482299804688, 28.315889358520508, 20.338211059570312, 36.73372268676758, 50.55476379394531, 8.063526153564453, 15.273748397827148, 26.84314727783203, 36.62244415283203, -1.07952880859375, 4.284936904907227, 39.365692138671875, 22.183229446411133, 28.145416259765625, 72.41719055175781, -5.756996154785156, 45.7642822265625, 33.472938537597656], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000444.npy"}
|
|
{"epoch": 0.6519823788546255, "step": 445, "batch_size": 64, "mean": 31.9174747467041, "std": 27.97288703918457, "min": -17.563079833984375, "p10": 0.14051380157470733, "median": 24.574329376220703, "p90": 64.97780532836914, "max": 126.83767700195312, "pos_frac": 0.90625, "sample": [7.888973236083984, 0.011081695556640625, 27.771682739257812, 62.60906982421875, 19.7242431640625, 37.82054138183594, 6.115091323852539, -4.6718292236328125, 24.401039123535156, 50.039306640625, 14.566120147705078, 72.15343475341797, 3.0619354248046875, -3.4669933319091797, 34.45542907714844, -8.8365478515625, 44.45916748046875, 59.09496307373047, -4.716829299926758, 16.67413330078125, 13.151348114013672, 22.481826782226562, 60.132041931152344, 61.30879211425781, 2.027973175048828, 23.280982971191406, 64.194091796875, 65.81924438476562, 22.651992797851562, 62.13633728027344, 30.82619857788086, 53.9053955078125, 45.017173767089844, 65.31368255615234, 46.24146270751953, 3.9650115966796875, 47.114219665527344, 55.32975769042969, 0.4425220489501953, 68.84660339355469, -0.46526336669921875, 126.83767700195312, 67.31463623046875, 13.185256958007812, 17.76729965209961, 57.062896728515625, 8.827583312988281, 7.164176940917969, 22.925071716308594, 27.5429630279541, 30.324874877929688, 24.74761962890625, 3.685161590576172, 63.565101623535156, 62.3121337890625, 41.59803771972656, 8.489715576171875, 23.857439041137695, 98.05050659179688, 13.834381103515625, 18.960994720458984, 22.24213409423828, 27.1104736328125, -17.563079833984375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000445.npy"}
|
|
{"epoch": 0.6534508076358296, "step": 446, "batch_size": 64, "mean": 30.172958374023438, "std": 26.881940841674805, "min": -13.298759460449219, "p10": 1.4689628601074218, "median": 29.502933502197266, "p90": 58.65085220336916, "max": 121.2496337890625, "pos_frac": 0.921875, "sample": [13.610366821289062, -13.298759460449219, 1.5146942138671875, 63.11927795410156, 89.54214477539062, 46.38033676147461, 30.920124053955078, 22.69522476196289, 10.985286712646484, 107.37059020996094, 121.2496337890625, 32.15069580078125, 41.94019317626953, 40.93504333496094, 8.26446533203125, 44.566898345947266, 52.789398193359375, 9.092567443847656, 16.892574310302734, 23.34864044189453, -0.1880035400390625, 1.4493637084960938, 34.27099609375, 8.303855895996094, 2.4298934936523438, 13.145675659179688, 38.94300079345703, 44.20298385620117, 18.512619018554688, 38.52779769897461, 29.451614379882812, 3.935699462890625, 44.92933654785156, 44.57685852050781, 0.38910865783691406, 35.63468933105469, -4.583160400390625, 60.66502380371094, 2.0042877197265625, 65.15460205078125, 50.21403503417969, 3.6501102447509766, 43.46399688720703, 29.55425262451172, 47.046234130859375, 14.660774230957031, 36.914398193359375, 42.91015625, 29.80280303955078, 53.745880126953125, 25.6131591796875, 14.7591552734375, 4.015533447265625, 17.73801040649414, -0.4235496520996094, 6.404502868652344, 12.11587905883789, 18.087478637695312, 53.95111846923828, 29.686296463012695, -3.5627059936523438, 93.66592407226562, 19.581451416015625, 41.64878845214844], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000446.npy"}
|
|
{"epoch": 0.6549192364170338, "step": 447, "batch_size": 64, "mean": 31.73101806640625, "std": 28.9771671295166, "min": -10.571380615234375, "p10": -0.9110239028930646, "median": 27.255020141601562, "p90": 67.70272369384766, "max": 142.198974609375, "pos_frac": 0.890625, "sample": [23.254009246826172, 45.12932586669922, 13.962417602539062, 29.331663131713867, 1.7430267333984375, -2.8771915435791016, 54.176910400390625, 43.65802764892578, 23.279598236083984, -7.8604583740234375, 1.4079627990722656, 51.975982666015625, 36.2225341796875, 32.6088981628418, 62.270965576171875, 27.337493896484375, 87.05473327636719, -2.6533565521240234, 142.198974609375, 68.11370849609375, 13.79893684387207, 101.5321044921875, 48.98455047607422, 47.74756622314453, 14.816879272460938, 28.261497497558594, 33.00385665893555, 2.647014617919922, 12.561882019042969, 23.744239807128906, 4.1564788818359375, -6.859260559082031, 34.736175537109375, -10.571380615234375, 50.07593536376953, 10.233442306518555, 59.88250732421875, 27.17254638671875, 14.908279418945312, 1.52276611328125, 16.016921997070312, 0.9549827575683594, 39.24802780151367, -1.7107410430908203, 66.74375915527344, 16.303184509277344, 26.507675170898438, 16.136247634887695, 13.635009765625, 16.0809268951416, 24.559906005859375, 39.07069396972656, 71.97492980957031, 33.26301574707031, 14.019523620605469, 51.053985595703125, 59.551265716552734, 10.520149230957031, 74.5093765258789, 83.05807495117188, 30.220924377441406, 48.14190673828125, 43.238407135009766, -5.00433349609375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000447.npy"}
|
|
{"epoch": 0.6563876651982379, "step": 448, "batch_size": 64, "mean": 32.101654052734375, "std": 25.543386459350586, "min": -23.457015991210938, "p10": 5.658212471008301, "median": 24.609575271606445, "p90": 74.5552131652832, "max": 88.93195343017578, "pos_frac": 0.9375, "sample": [24.679527282714844, 17.100147247314453, 20.152095794677734, 49.39787292480469, 79.45246124267578, -9.25311279296875, 21.310279846191406, 17.69762420654297, 24.21918487548828, 26.230621337890625, 41.496986389160156, 14.856307983398438, 32.205047607421875, 13.656715393066406, 58.92243194580078, 46.22844696044922, 21.135732650756836, 44.479278564453125, 74.73652648925781, 21.981910705566406, -23.457015991210938, 25.032176971435547, 66.72417449951172, 22.172775268554688, 24.539623260498047, 5.749114990234375, 27.130474090576172, 16.51183319091797, 64.75958251953125, 78.71743774414062, 36.15985870361328, 57.71204376220703, 5.716535568237305, 74.13214874267578, 82.25837707519531, -2.892976760864258, 23.92291259765625, 13.881942749023438, 5.633216857910156, 29.163330078125, 17.960861206054688, 77.09286499023438, 16.901229858398438, 17.269882202148438, 88.93195343017578, 30.30401611328125, 10.8302001953125, 18.783044815063477, 13.898025512695312, 51.694190979003906, 36.34899139404297, -4.905487060546875, 69.07456970214844, 9.3963623046875, 44.09521484375, 63.96001434326172, 1.6326980590820312, 16.08202362060547, 25.74495506286621, 4.23748779296875, 50.418460845947266, 25.188888549804688, 19.399871826171875, 75.9119644165039], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000448.npy"}
|
|
{"epoch": 0.657856093979442, "step": 449, "batch_size": 64, "mean": 31.415071487426758, "std": 30.40106201171875, "min": -49.0272216796875, "p10": -0.7997825622558578, "median": 30.896163940429688, "p90": 75.41165924072267, "max": 119.75822448730469, "pos_frac": 0.890625, "sample": [3.767852783203125, 20.682586669921875, 32.684906005859375, 5.5046844482421875, 76.88174438476562, 40.30268096923828, 16.067001342773438, 12.130308151245117, 82.03456115722656, 39.38831329345703, 66.97078704833984, 19.90231704711914, 1.4271316528320312, 0.8472137451171875, 30.94183349609375, 43.591983795166016, 37.385948181152344, 17.187339782714844, 24.92601776123047, 3.420360565185547, 62.085479736328125, 20.376686096191406, 49.82801055908203, 32.497802734375, 14.626167297363281, 86.26348876953125, 30.95447540283203, 7.376142501831055, 30.850494384765625, 39.753841400146484, 42.18503952026367, 3.0599441528320312, 15.026397705078125, 26.494369506835938, 41.495849609375, 95.50808715820312, -14.660728454589844, 19.826980590820312, -6.506500244140625, -6.80029296875, 119.75822448730469, 43.758567810058594, -13.078193664550781, 57.194190979003906, 35.205909729003906, 15.967864990234375, -7.9764404296875, -49.0272216796875, 29.030712127685547, 71.98146057128906, 44.30542755126953, 79.93643951416016, 36.142059326171875, 11.916738510131836, 10.625518798828125, 43.204612731933594, 55.96124267578125, 102.54569244384766, 42.157997131347656, 51.78765869140625, 18.688377380371094, -1.5056381225585938, 34.01060485839844, 11.68564224243164], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000449.npy"}
|
|
{"epoch": 0.6593245227606461, "step": 450, "batch_size": 64, "mean": 34.53790283203125, "std": 26.31365394592285, "min": -4.369209289550781, "p10": 3.9715761184692404, "median": 29.440296173095703, "p90": 67.05525360107423, "max": 121.40519714355469, "pos_frac": 0.953125, "sample": [26.9737548828125, 21.34979248046875, 44.32029724121094, 45.84419250488281, 25.21075439453125, 97.64665222167969, 65.85295104980469, -3.1928558349609375, 11.81976318359375, 1.0396347045898438, 22.18413543701172, 81.55931091308594, 70.58348083496094, 12.300315856933594, 51.21868133544922, 50.4974365234375, 35.38923645019531, -1.4954833984375, 30.69256591796875, 52.531394958496094, 33.17259216308594, 14.64202880859375, 11.073890686035156, 58.32763671875, 33.18577575683594, 20.797382354736328, 51.97344970703125, 69.71493530273438, 25.522300720214844, 61.8212890625, 43.37770080566406, 67.57052612304688, 54.02172088623047, 17.9765625, 40.979026794433594, 2.9325428009033203, 16.435123443603516, 54.45494842529297, 28.188026428222656, 24.48107147216797, 86.95774841308594, 0.389923095703125, 14.173919677734375, 65.4326400756836, 7.106800079345703, 23.921409606933594, -4.369209289550781, 42.5980224609375, 45.13397979736328, 8.734527587890625, 16.209388732910156, 16.105148315429688, 41.13970184326172, 55.6148681640625, 22.932416915893555, 3.0087623596191406, 60.00093078613281, 36.818138122558594, 37.67424774169922, 12.715492248535156, 7.290428161621094, 121.40519714355469, 10.238632202148438, 6.218141555786133], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000450.npy"}
|
|
{"epoch": 0.6607929515418502, "step": 451, "batch_size": 64, "mean": 29.465295791625977, "std": 26.04265785217285, "min": -38.6256103515625, "p10": 0.4771385192871095, "median": 24.735360145568848, "p90": 64.72141647338867, "max": 95.07046508789062, "pos_frac": 0.921875, "sample": [9.983291625976562, 48.83152770996094, 24.444915771484375, 63.65802764892578, 53.294654846191406, 18.891403198242188, 3.9172210693359375, 58.12037658691406, 8.957412719726562, 84.12345886230469, 40.765716552734375, 95.07046508789062, 45.1068115234375, 60.35862731933594, 20.029075622558594, 48.021217346191406, 6.833829879760742, -38.6256103515625, 27.653602600097656, 0.575164794921875, 24.145896911621094, 17.36126708984375, 57.89715576171875, 66.55612182617188, -9.262985229492188, 11.828536987304688, 13.119222640991211, 33.165706634521484, 17.78774642944336, 67.77043151855469, 29.665203094482422, 82.498046875, 51.93547821044922, 25.012542724609375, 52.82306671142578, 28.432769775390625, 33.40557861328125, 13.858993530273438, 33.38391876220703, -3.887664794921875, -19.1916446685791, 0.11912155151367188, 65.17715454101562, 18.121322631835938, 34.503211975097656, 24.45817756652832, 24.15968132019043, 42.75970458984375, 37.153175354003906, 11.641080856323242, 47.38233184814453, 0.43512725830078125, 0.9960746765136719, 24.37360382080078, 39.016441345214844, 33.158111572265625, 16.41509246826172, 3.3024215698242188, 23.87474250793457, 2.1970558166503906, 74.37548828125, -7.339141845703125, 22.03972625732422, 39.14253616333008], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000451.npy"}
|
|
{"epoch": 0.6622613803230544, "step": 452, "batch_size": 64, "mean": 30.134782791137695, "std": 22.0466365814209, "min": -4.161996841430664, "p10": 6.748517227172852, "median": 28.01622772216797, "p90": 58.3771224975586, "max": 121.65464782714844, "pos_frac": 0.984375, "sample": [19.193878173828125, 30.163841247558594, 59.77197265625, 18.757186889648438, 17.05280303955078, 31.644962310791016, 30.22182273864746, 36.91741943359375, 32.09228515625, 11.604270935058594, 83.830810546875, 34.01471710205078, 9.323318481445312, 4.419153213500977, 59.51911926269531, 17.084609985351562, 31.889509201049805, 45.724342346191406, 37.134864807128906, 4.101774215698242, 43.297760009765625, 9.954566955566406, 74.671875, 25.678955078125, 17.825973510742188, 54.3214111328125, 21.321176528930664, -4.161996841430664, 64.3399658203125, 17.139686584472656, 5.4506072998046875, 50.887451171875, 10.956104278564453, 28.745391845703125, 55.71246337890625, 29.77353286743164, 17.56174087524414, 35.15658950805664, 5.026496887207031, 25.04772186279297, 6.849857330322266, 5.8048858642578125, 33.5859375, 37.297637939453125, 9.432991027832031, 34.042938232421875, 22.1962890625, 28.745941162109375, 121.65464782714844, 21.7476806640625, 35.45330047607422, 29.30978775024414, 41.39874267578125, 24.776397705078125, 20.278518676757812, 11.585563659667969, 15.416519165039062, 79.11802673339844, 6.705085754394531, 37.29829406738281, 27.287063598632812, 41.27345275878906, 9.016998291015625, 25.17949104309082], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000452.npy"}
|
|
{"epoch": 0.6637298091042585, "step": 453, "batch_size": 64, "mean": 29.085241317749023, "std": 25.585979461669922, "min": -17.163497924804688, "p10": 1.6516025543212907, "median": 22.94738006591797, "p90": 63.12253494262696, "max": 104.78207397460938, "pos_frac": 0.9375, "sample": [45.76201629638672, 61.28010559082031, 18.436935424804688, 9.926237106323242, 71.77098846435547, 6.7372589111328125, 4.882667541503906, 53.729827880859375, 13.025899887084961, 14.552841186523438, 42.22303771972656, 25.84673309326172, 37.40110397338867, 21.52789306640625, 53.92232894897461, 19.5067138671875, 35.83775329589844, 30.854406356811523, 6.127889633178711, 9.570724487304688, 28.960205078125, -8.40695571899414, 12.822883605957031, 32.62895965576172, 85.05792236328125, 17.669189453125, 28.112472534179688, 37.23273468017578, -11.960418701171875, 56.73724365234375, 14.487964630126953, 7.310089111328125, 54.75642395019531, 64.30835723876953, 63.912147521972656, 21.30789566040039, 0.7525787353515625, 8.98782730102539, 104.78207397460938, 60.35789489746094, 3.9046783447265625, -1.4800968170166016, 88.01313781738281, 8.621002197265625, 25.539581298828125, 3.1732635498046875, 24.366867065429688, 18.712135314941406, 32.976158142089844, 58.85124969482422, -17.163497924804688, 49.820526123046875, 0.9994621276855469, 33.25894546508789, 35.12945556640625, 12.533113479614258, 20.874313354492188, 13.352880477905273, 66.75457763671875, 59.27079772949219, 7.222164154052734, 33.953704833984375, 0.7475814819335938, 19.282691955566406], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000453.npy"}
|
|
{"epoch": 0.6651982378854625, "step": 454, "batch_size": 64, "mean": 32.31932830810547, "std": 26.018522262573242, "min": -18.416839599609375, "p10": 0.6077869415283209, "median": 25.710498809814453, "p90": 73.8269874572754, "max": 100.41751098632812, "pos_frac": 0.921875, "sample": [1.1672897338867188, 23.782196044921875, 73.77877807617188, 46.50996398925781, 39.86847686767578, 24.374900817871094, 21.30584716796875, 26.062149047851562, 9.886470794677734, 32.082420349121094, -18.416839599609375, 13.815483093261719, 25.58344268798828, 82.5283203125, 22.26168441772461, 28.449050903320312, 16.847505569458008, 44.23614501953125, 0.3680000305175781, 25.837554931640625, 32.219573974609375, 58.72084045410156, 20.75299072265625, 74.6828384399414, 36.272682189941406, 38.47045135498047, 24.524185180664062, 6.719297409057617, 63.68861389160156, 48.652931213378906, 90.84257507324219, 6.818145751953125, 53.44004821777344, 24.103988647460938, 73.84764862060547, 21.589576721191406, 24.871604919433594, 17.581497192382812, 68.35011291503906, 35.77753448486328, 72.60481262207031, 30.095497131347656, 6.2510528564453125, 41.90676498413086, 23.007553100585938, 76.22222900390625, 21.655303955078125, 17.432899475097656, 44.918846130371094, 0.19397735595703125, 21.801742553710938, 100.41751098632812, -10.520973205566406, 46.425392150878906, 12.438112258911133, 36.49360656738281, -2.8759307861328125, -9.535568237304688, 29.381736755371094, 18.772750854492188, 36.377716064453125, -1.4940032958984375, 78.43634033203125, 15.77362060546875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000454.npy"}
|
|
{"epoch": 0.6666666666666666, "step": 455, "batch_size": 64, "mean": 37.20379638671875, "std": 26.521596908569336, "min": -26.825836181640625, "p10": 2.3324262619018556, "median": 37.252262115478516, "p90": 76.84151382446291, "max": 93.70462036132812, "pos_frac": 0.96875, "sample": [18.737071990966797, 27.565261840820312, 47.944236755371094, 80.86083984375, 8.166744232177734, 43.258270263671875, 55.37879943847656, 1.1786937713623047, 78.67581176757812, 2.192049026489258, 67.7850341796875, 33.013301849365234, 34.11946105957031, 16.5115966796875, 81.6773681640625, -26.825836181640625, 80.2081298828125, 91.47824096679688, 20.61573028564453, -9.356147766113281, 1.7310562133789062, 60.357383728027344, 18.959548950195312, 19.022127151489258, 72.56148529052734, 78.82752990722656, 44.37101364135742, 4.657585144042969, 15.828277587890625, 13.074386596679688, 48.6617546081543, 48.65846633911133, 21.804412841796875, 27.104263305664062, 17.548681259155273, 8.298248291015625, 22.659015655517578, 54.538902282714844, 2.65997314453125, 7.675376892089844, 44.82029342651367, 34.66143798828125, 66.99505615234375, 39.42748260498047, 0.3526458740234375, 28.456336975097656, 0.7443561553955078, 43.06805419921875, 45.84064483642578, 30.413436889648438, 62.596763610839844, 35.07704162597656, 59.44419860839844, 54.70497131347656, 40.99761199951172, 28.255847930908203, 17.499980926513672, 49.86378479003906, 58.47323989868164, 52.24974060058594, 93.70462036132812, 48.15660858154297, 47.0210075378418, 56.033935546875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000455.npy"}
|
|
{"epoch": 0.6681350954478708, "step": 456, "batch_size": 64, "mean": 37.51462936401367, "std": 28.913846969604492, "min": -13.976806640625, "p10": 4.456723976135255, "median": 34.63663864135742, "p90": 81.35093231201172, "max": 101.62435913085938, "pos_frac": 0.9375, "sample": [14.190990447998047, -5.2823486328125, 80.31143188476562, 32.870269775390625, 15.349319458007812, 47.89302062988281, 19.98245620727539, -6.426185607910156, 33.01219940185547, 81.79643249511719, 39.936553955078125, 4.183294296264648, 12.477005004882812, 89.42861938476562, 28.318344116210938, 50.815834045410156, 38.47833251953125, 61.21478271484375, 5.712789535522461, 34.331268310546875, 6.096771240234375, 13.245922088623047, 5.0947265625, 73.97064208984375, 10.92184066772461, 60.763885498046875, 61.11482238769531, 56.48090362548828, 68.33953857421875, 47.79327392578125, 36.793678283691406, 85.12008666992188, 75.21076202392578, 23.97555923461914, -13.976806640625, -0.335693359375, 17.990266799926758, 45.387428283691406, 78.02250671386719, 0.8581085205078125, 44.336448669433594, 37.87107849121094, 23.086631774902344, 9.524137496948242, 27.429065704345703, 16.25042724609375, 9.498517990112305, 25.76153564453125, 101.62435913085938, 6.343544006347656, 94.92825317382812, 61.502166748046875, 48.86668395996094, 11.405403137207031, 22.648372650146484, 44.02361297607422, 55.056373596191406, 26.590164184570312, 38.741180419921875, 3.0373764038085938, 34.94200897216797, 83.49264526367188, 94.24044799804688, 48.273216247558594], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000456.npy"}
|
|
{"epoch": 0.6696035242290749, "step": 457, "batch_size": 64, "mean": 33.051368713378906, "std": 24.597646713256836, "min": -25.799957275390625, "p10": 6.163788414001466, "median": 30.566932678222656, "p90": 62.195135498046874, "max": 102.76638793945312, "pos_frac": 0.9375, "sample": [28.021617889404297, 54.267242431640625, 63.92877197265625, 18.760337829589844, 26.345169067382812, 14.611665725708008, 45.76805114746094, 51.94842529296875, 39.772178649902344, 23.576385498046875, 102.76638793945312, 83.98298645019531, 14.190570831298828, -8.592571258544922, 52.959747314453125, 36.92144775390625, 40.791526794433594, 11.585437774658203, 56.98389434814453, 48.81513977050781, 17.314414978027344, 7.501056671142578, 17.41301727294922, 49.33818054199219, 17.923065185546875, 41.24567413330078, -3.619903564453125, 8.436016082763672, 81.10616302490234, 35.8990478515625, 42.835113525390625, 21.63775634765625, 8.194339752197266, 52.18004608154297, 44.74797821044922, 0.2057342529296875, 66.65180969238281, 31.92559051513672, 27.1773681640625, 36.671600341796875, 27.860702514648438, 28.986690521240234, 44.83995056152344, -10.267223358154297, 5.590673446655273, 7.913948059082031, 62.38628387451172, 32.13549041748047, 37.08586883544922, 61.749122619628906, 23.522315979003906, 51.412715911865234, 34.32111358642578, 94.20541381835938, -25.799957275390625, 25.640342712402344, 49.93170928955078, 14.477462768554688, 2.834423065185547, 24.399452209472656, 45.39238739013672, 24.03191375732422, 29.208274841308594, 11.239913940429688], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000457.npy"}
|
|
{"epoch": 0.671071953010279, "step": 458, "batch_size": 64, "mean": 38.03219223022461, "std": 32.63706970214844, "min": -21.762176513671875, "p10": -0.1276874542236316, "median": 39.85420227050781, "p90": 73.12674102783203, "max": 152.34742736816406, "pos_frac": 0.890625, "sample": [12.004779815673828, 2.8549118041992188, 52.30049133300781, 55.70151901245117, 26.578853607177734, -21.762176513671875, 139.55844116210938, -6.09046745300293, 58.96630859375, 53.862335205078125, 4.943807601928711, 28.78095245361328, 94.13729858398438, 29.510028839111328, 110.61538696289062, -16.82733917236328, 17.304386138916016, 1.1288032531738281, 50.801883697509766, 40.936004638671875, 46.910560607910156, 72.87847900390625, 38.608360290527344, 73.23313903808594, 46.885009765625, 43.28025817871094, 13.758316040039062, 44.427001953125, 34.18341064453125, 45.414207458496094, 44.017425537109375, 88.85408782958984, 43.207855224609375, 50.69416046142578, 40.4561767578125, -4.5373382568359375, 47.65773010253906, 14.604171752929688, 45.31629180908203, 76.21771240234375, 39.252227783203125, 30.772201538085938, -8.602119445800781, -0.6661834716796875, 34.22229766845703, 11.000076293945312, 43.226810455322266, 3.936676025390625, 27.073638916015625, 152.34742736816406, 18.67913818359375, 15.439868927001953, 28.363210678100586, 28.410852432250977, 15.386459350585938, 41.642730712890625, 55.94599151611328, 55.43564987182617, 22.922653198242188, 40.60783767700195, 38.06170654296875, 46.41373062133789, -10.262954711914062, 63.077110290527344], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000458.npy"}
|
|
{"epoch": 0.6725403817914831, "step": 459, "batch_size": 64, "mean": 28.914405822753906, "std": 23.701152801513672, "min": -28.20240020751953, "p10": 1.7999582290649414, "median": 28.115949630737305, "p90": 61.58744812011719, "max": 83.88978576660156, "pos_frac": 0.96875, "sample": [11.94476318359375, 8.003776550292969, 3.3608474731445312, 44.31231689453125, 19.84991455078125, 32.5023078918457, 26.350799560546875, 8.621139526367188, 38.63005828857422, 34.205970764160156, 29.760910034179688, -2.5316619873046875, 48.55615234375, 34.120819091796875, 0.3209056854248047, 13.126296997070312, 51.965492248535156, 18.15026092529297, 1.3859939575195312, 83.88978576660156, 1.3308181762695312, 79.43084716796875, 30.333221435546875, 10.315620422363281, 14.919784545898438, 34.38861846923828, 50.575008392333984, 3.4180450439453125, 10.746112823486328, 34.11378479003906, 34.63698196411133, 40.60511016845703, 69.88606262207031, -28.20240020751953, 46.82234191894531, 1.8747997283935547, 69.72428894042969, 29.369457244873047, 1.76788330078125, 21.975067138671875, 26.862442016601562, 48.429237365722656, 20.801483154296875, 9.59967041015625, 20.524383544921875, 47.94171905517578, 4.039314270019531, 58.52937316894531, 15.078914642333984, 61.46074295043945, 62.23253631591797, 44.941261291503906, 7.786994934082031, 24.195510864257812, 11.152732849121094, 54.3887939453125, 5.201560974121094, 0.7782878875732422, 61.64175033569336, 76.14968872070312, 54.985191345214844, 30.031982421875, 3.5632266998291016, 35.6468505859375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000459.npy"}
|
|
{"epoch": 0.6740088105726872, "step": 460, "batch_size": 64, "mean": 32.56512451171875, "std": 25.778074264526367, "min": -12.71712875366211, "p10": -1.0604377746582014, "median": 34.04936981201172, "p90": 64.51049270629883, "max": 109.62905883789062, "pos_frac": 0.890625, "sample": [39.79206848144531, 56.30812072753906, 31.513885498046875, 30.92587661743164, 22.825355529785156, 61.32693862915039, 62.35296630859375, 50.090240478515625, 11.452262878417969, 13.435474395751953, 16.80742645263672, 46.094383239746094, -12.71712875366211, 29.610992431640625, 3.580249786376953, 36.730743408203125, 19.657211303710938, 11.94549560546875, 1.8625030517578125, 70.75096130371094, 13.293010711669922, 34.556419372558594, 79.94682312011719, 9.580474853515625, 64.97972106933594, 36.65340805053711, 109.62905883789062, 42.56437683105469, 36.47569274902344, 32.941558837890625, 34.448760986328125, -2.1483688354492188, 58.134979248046875, 2.9430084228515625, 21.958663940429688, 89.51553344726562, -4.703874588012695, 40.226905822753906, 47.76408386230469, 52.818084716796875, 43.053199768066406, 33.83136749267578, 6.896888732910156, 40.9849739074707, 69.42545318603516, -4.93391227722168, 28.507354736328125, 48.61537170410156, 19.474098205566406, 39.40122985839844, 11.223350524902344, 0.45778656005859375, 7.733558654785156, -1.7111053466796875, -9.343658447265625, 42.51335906982422, 41.75773620605469, 19.118370056152344, -3.3088836669921875, 19.073997497558594, 63.415626525878906, 34.267372131347656, 51.61867904663086, 76.17129516601562], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000460.npy"}
|
|
{"epoch": 0.6754772393538914, "step": 461, "batch_size": 64, "mean": 27.81532859802246, "std": 28.51862144470215, "min": -17.881759643554688, "p10": -0.8643449783325196, "median": 24.851158142089844, "p90": 54.906156921386724, "max": 134.0843505859375, "pos_frac": 0.875, "sample": [27.682525634765625, 3.9995975494384766, 6.770637512207031, 11.829444885253906, 29.44314193725586, 43.8443603515625, 6.873870849609375, 55.159034729003906, 5.395263671875, 11.908309936523438, 25.009246826171875, -17.30498504638672, 33.41563415527344, 34.191368103027344, 134.0843505859375, 39.71587371826172, 76.08619689941406, -0.8391094207763672, -0.8751602172851562, 12.593246459960938, 13.1365966796875, 25.645402908325195, 54.31610870361328, 73.78727722167969, 39.934242248535156, 8.946136474609375, 50.51829528808594, 129.6348419189453, 25.440561294555664, 21.307098388671875, 49.4793701171875, 23.75395393371582, 51.407779693603516, 8.131935119628906, 39.9178466796875, -2.6045684814453125, 9.979705810546875, 4.01416015625, -17.881759643554688, 24.693069458007812, 69.55545043945312, -5.04887580871582, 35.18473815917969, 29.769981384277344, -6.232931137084961, 4.36981201171875, 25.20418930053711, 52.766204833984375, 13.050430297851562, 13.539337158203125, 23.247421264648438, 8.993600845336914, 21.989933013916016, 6.138702392578125, 19.74422836303711, -10.533905029296875, 4.75225830078125, 29.698537826538086, 42.771690368652344, 61.66862487792969, 38.67424011230469, 45.83118438720703, 48.68650817871094, 33.81871795654297], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000461.npy"}
|
|
{"epoch": 0.6769456681350955, "step": 462, "batch_size": 64, "mean": 26.86299705505371, "std": 25.792369842529297, "min": -22.348979949951172, "p10": -0.24614410400390557, "median": 20.27871322631836, "p90": 70.00483627319336, "max": 85.3882064819336, "pos_frac": 0.890625, "sample": [22.12017822265625, -0.5392227172851562, -5.077178955078125, 24.197525024414062, 77.29187774658203, 36.402069091796875, 19.577150344848633, 0.9583663940429688, 17.402328491210938, 11.845718383789062, 7.9623870849609375, 8.505889892578125, 19.56015396118164, -9.60603141784668, 35.299949645996094, 69.16268920898438, 70.28184509277344, 44.69921875, -10.10565185546875, 43.43589401245117, 0.43770599365234375, 85.3882064819336, 32.25101089477539, 2.7854156494140625, 25.563152313232422, 18.01123046875, 14.908378601074219, 37.5452880859375, 2.7500858306884766, 16.755001068115234, 83.4556655883789, 10.312793731689453, 14.50838851928711, 32.33351135253906, 20.795989990234375, 73.4481430053711, 9.198898315429688, -0.8009052276611328, 1.7895050048828125, 79.25907897949219, 12.459722518920898, 18.621444702148438, 36.20200729370117, 38.96106719970703, 37.45348358154297, 21.055030822753906, 47.696044921875, 74.4595947265625, 12.648170471191406, 45.398521423339844, -9.95330810546875, 19.761436462402344, 69.35848236083984, 60.80208206176758, 44.42725372314453, 9.620811462402344, 23.03777313232422, 24.0302791595459, 18.741470336914062, 58.133262634277344, 7.905364990234375, 22.46332550048828, -22.348979949951172, 4.225757598876953], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000462.npy"}
|
|
{"epoch": 0.6784140969162996, "step": 463, "batch_size": 64, "mean": 33.929237365722656, "std": 24.047834396362305, "min": -25.2232723236084, "p10": 3.096006774902345, "median": 33.921653747558594, "p90": 60.983084869384776, "max": 97.442138671875, "pos_frac": 0.921875, "sample": [2.587554931640625, 67.21176147460938, -25.2232723236084, -4.634788513183594, 49.370567321777344, 39.961883544921875, 13.771778106689453, 27.432899475097656, 15.98880386352539, 11.463348388671875, 33.207069396972656, 28.51348114013672, 43.81004333496094, 76.47747802734375, 24.98670196533203, 16.217164993286133, 23.844528198242188, 25.964235305786133, 41.252342224121094, 36.647945404052734, 46.451988220214844, 97.442138671875, 28.071495056152344, 39.68681335449219, 15.202018737792969, 82.5810546875, -2.9328956604003906, 18.103668212890625, 45.280792236328125, 54.674232482910156, 17.850006103515625, 34.897560119628906, -3.6511001586914062, 4.2823944091796875, 47.652099609375, -2.593597412109375, 18.473907470703125, 24.232406616210938, 43.21308135986328, 17.736164093017578, 34.08624267578125, 57.90570068359375, 50.238487243652344, 38.08296203613281, 54.383544921875, 44.59825134277344, 22.076255798339844, 19.85430145263672, 48.909095764160156, 33.75706481933594, 62.301963806152344, 82.59451293945312, 39.1777458190918, 32.43257141113281, 24.014320373535156, 2.4518280029296875, 49.602909088134766, 4.5832977294921875, 34.79118347167969, 57.74217987060547, 35.17169189453125, 53.91741180419922, 92.52934265136719, 20.762496948242188], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000463.npy"}
|
|
{"epoch": 0.6798825256975036, "step": 464, "batch_size": 64, "mean": 34.593727111816406, "std": 29.905498504638672, "min": -33.46949005126953, "p10": 0.7120391845703142, "median": 29.09914207458496, "p90": 71.723583984375, "max": 127.978515625, "pos_frac": 0.890625, "sample": [-0.00213623046875, 44.269073486328125, 47.25923156738281, 3.0769271850585938, 61.56658172607422, 33.31689453125, 18.980438232421875, 15.116300582885742, 20.435516357421875, 42.732666015625, 18.3197021484375, 81.00225830078125, 12.57464599609375, 40.8863525390625, 55.98579406738281, 58.50590515136719, 25.826919555664062, 36.3248176574707, -2.908355712890625, 72.02810668945312, 30.919689178466797, 42.334693908691406, 42.33069610595703, 3.85040283203125, 60.38267517089844, -6.69598388671875, 43.48932647705078, 120.31324768066406, 3.3758773803710938, 22.446327209472656, 17.6651611328125, 77.29057312011719, 27.278594970703125, 8.903858184814453, -0.40301513671875, 42.659908294677734, 42.46409606933594, 18.17325210571289, -4.417449951171875, 22.882781982421875, 26.697914123535156, 23.36342430114746, 71.01303100585938, 11.334653854370117, 16.233463287353516, 54.66116714477539, -33.46949005126953, 35.503684997558594, 7.609764099121094, 53.223602294921875, 44.7186279296875, 88.47660827636719, 127.978515625, 63.76002502441406, 21.597213745117188, 62.14696502685547, 80.92543029785156, -2.4341506958007812, 55.07243347167969, 49.39668273925781, 26.896881103515625, 2.378448486328125, 17.309661865234375, 9.06158447265625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000464.npy"}
|
|
{"epoch": 0.6813509544787077, "step": 465, "batch_size": 64, "mean": 37.622222900390625, "std": 25.430631637573242, "min": -10.123451232910156, "p10": 5.885792541503908, "median": 33.624732971191406, "p90": 71.749853515625, "max": 97.96784973144531, "pos_frac": 0.953125, "sample": [49.09599304199219, 73.2752685546875, 28.51665496826172, 21.247093200683594, 9.791635513305664, 39.29869079589844, 34.91258239746094, 33.143768310546875, 11.16524887084961, 31.375694274902344, 24.484268188476562, 51.42681884765625, 4.636348724365234, 10.210426330566406, 63.56787109375, 97.96784973144531, 29.22525978088379, 54.49702072143555, 34.818359375, 31.873184204101562, 12.783409118652344, 54.70854568481445, 92.74154663085938, 33.6077880859375, 85.5406494140625, 2.0242843627929688, 91.25798034667969, 18.55218505859375, 21.20966339111328, 27.0711669921875, 16.916501998901367, 71.797607421875, 40.50733184814453, 35.847755432128906, 28.8526668548584, 55.18634033203125, -10.123451232910156, 45.52445983886719, 26.424243927001953, 47.7342529296875, 7.227836608886719, 31.932693481445312, 33.119361877441406, -7.1622161865234375, 67.92793273925781, 5.310630798339844, 39.66534423828125, 48.86627960205078, 14.074825286865234, 40.0515251159668, 71.638427734375, 2.4873600006103516, 46.70941162109375, -4.482887268066406, 63.50152587890625, 88.08541870117188, 19.564777374267578, 35.46464538574219, 30.898513793945312, 51.50879669189453, 23.947891235351562, 61.4095458984375, 69.73804473876953, 33.64167785644531], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000465.npy"}
|
|
{"epoch": 0.6828193832599119, "step": 466, "batch_size": 64, "mean": 34.00209045410156, "std": 23.88724136352539, "min": -20.9278564453125, "p10": 5.35496826171875, "median": 33.67691993713379, "p90": 67.63508377075196, "max": 84.46217346191406, "pos_frac": 0.9375, "sample": [52.75814437866211, 24.87525177001953, 22.42383575439453, 16.585174560546875, 33.4176025390625, 26.976768493652344, 18.91462516784668, 57.33695983886719, 39.01976013183594, 10.982646942138672, 30.200103759765625, 53.536705017089844, -20.9278564453125, 57.612762451171875, 16.390888214111328, 48.172874450683594, 37.95844268798828, 13.388439178466797, 32.3870849609375, 24.19963836669922, 10.361526489257812, 57.18882751464844, 52.924232482910156, 67.20094299316406, 70.26954650878906, 30.760761260986328, 62.698307037353516, 73.41537475585938, 38.55647277832031, 40.58006286621094, 25.291900634765625, 33.93623733520508, 40.99681854248047, 84.46217346191406, 19.94689178466797, 63.67509460449219, 42.80661392211914, 45.617767333984375, 17.371658325195312, 22.594139099121094, 1.861989974975586, 3.783489227294922, 27.734878540039062, 75.41008758544922, 39.54823303222656, 67.8211441040039, 17.304637908935547, 11.515731811523438, 5.77178955078125, 34.862403869628906, 39.30790710449219, 22.395221710205078, 39.5546875, 26.234092712402344, 40.974395751953125, 78.49859619140625, -13.916389465332031, 16.72906494140625, 77.65171813964844, 5.17633056640625, 50.048927307128906, -7.586029052734375, 34.57239532470703, -15.986831665039062], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000466.npy"}
|
|
{"epoch": 0.684287812041116, "step": 467, "batch_size": 64, "mean": 34.46977996826172, "std": 28.059871673583984, "min": -13.036460876464844, "p10": 3.222737503051759, "median": 28.515260696411133, "p90": 75.22916107177736, "max": 126.71263122558594, "pos_frac": 0.921875, "sample": [51.38999938964844, 50.250518798828125, 126.71263122558594, 14.063018798828125, -7.001045227050781, -9.145172119140625, 22.71346664428711, 38.835235595703125, 35.616600036621094, 54.46742248535156, 76.83523559570312, 61.193206787109375, 36.49995422363281, -0.5970973968505859, 80.86062622070312, 51.478736877441406, 22.18663787841797, 5.969337463378906, 92.55773162841797, 39.044769287109375, 41.74391174316406, 7.163066864013672, 27.450706481933594, 80.99784851074219, 14.01995849609375, 17.319801330566406, 22.72820281982422, 14.104110717773438, 73.03697204589844, 35.232818603515625, 35.16838073730469, 2.762248992919922, 41.01275634765625, 14.819778442382812, 69.30880737304688, 20.927947998046875, 76.16867065429688, 22.31954574584961, 19.991775512695312, 4.297210693359375, 34.619384765625, 16.56775665283203, 34.30069351196289, 14.796348571777344, 39.50756072998047, 19.468734741210938, 9.292861938476562, -2.4877548217773438, 1.3106689453125, 60.796958923339844, 56.75653076171875, 64.75720977783203, 8.055496215820312, 29.579814910888672, 50.917724609375, 25.945030212402344, 49.600006103515625, 18.550872802734375, 22.756813049316406, 96.24759674072266, -13.036460876464844, 46.03938674926758, 13.912757873535156, 23.303466796875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000467.npy"}
|
|
{"epoch": 0.6857562408223201, "step": 468, "batch_size": 64, "mean": 32.051353454589844, "std": 26.79055404663086, "min": -10.897687911987305, "p10": 2.9351116180419936, "median": 27.947189331054688, "p90": 76.59086685180667, "max": 107.06901550292969, "pos_frac": 0.921875, "sample": [47.37403869628906, -10.897687911987305, 59.838043212890625, 17.051666259765625, 27.545867919921875, 34.871063232421875, 89.47651672363281, 34.07688903808594, 81.30435180664062, 62.154571533203125, 69.4819564819336, 7.5366363525390625, 31.662155151367188, 19.989356994628906, -6.967061996459961, 8.306640625, 30.109607696533203, 9.447893142700195, 35.48223876953125, 53.19123840332031, 26.301788330078125, 7.02165412902832, -4.260231018066406, 27.079872131347656, 32.587867736816406, 12.135986328125, 44.98766326904297, 89.53636169433594, 45.704345703125, 4.54925537109375, 0.2938671112060547, 11.103538513183594, 52.73918533325195, 27.56391143798828, 34.88214111328125, 16.048812866210938, 26.032798767089844, 31.80545425415039, 79.63754272460938, 28.330467224121094, 95.34387969970703, 22.648630142211914, 14.518020629882812, 22.132766723632812, 34.83854675292969, 22.3896484375, 46.015289306640625, 31.400314331054688, 11.061386108398438, 107.06901550292969, 33.82301330566406, 39.5799560546875, 2.243335723876953, -0.29131317138671875, 8.739151000976562, 39.70549774169922, 41.94384765625, 37.630035400390625, 89.51520538330078, -6.86749267578125, 27.402053833007812, 6.931606292724609, 15.511962890625, 12.884025573730469], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000468.npy"}
|
|
{"epoch": 0.6872246696035242, "step": 469, "batch_size": 64, "mean": 33.17765426635742, "std": 27.324230194091797, "min": -14.433998107910156, "p10": 0.01942462921142818, "median": 28.768460273742676, "p90": 67.58548736572266, "max": 126.06108856201172, "pos_frac": 0.890625, "sample": [50.43603515625, 34.44407653808594, 26.681119918823242, 36.59138488769531, 16.759002685546875, 4.065900802612305, 23.327930450439453, 21.781471252441406, 21.98193359375, -8.945960998535156, 59.154510498046875, 41.937347412109375, 29.39613151550293, -2.11541748046875, 68.87841796875, 32.985557556152344, 67.71148681640625, 41.359283447265625, 36.117164611816406, 58.8853759765625, 77.6664047241211, 19.467567443847656, 42.17597579956055, 14.218917846679688, 83.55841064453125, 3.153533935546875, 48.671974182128906, 67.29148864746094, 90.35675048828125, 2.714691162109375, 27.631763458251953, 25.223602294921875, 54.790077209472656, 2.365468978881836, 23.084075927734375, -8.909769058227539, 29.38611602783203, -4.0488128662109375, 7.732475280761719, 15.113357543945312, 67.20143127441406, -11.456741333007812, 54.128074645996094, 19.62313461303711, 27.426361083984375, 41.11274719238281, 28.15080451965332, 34.71693420410156, 60.11683654785156, 3.9286632537841797, 25.195571899414062, 69.70828247070312, -0.98602294921875, 126.06108856201172, 61.442291259765625, 65.65177154541016, -14.433998107910156, 40.23100662231445, 21.91338348388672, 23.385391235351562, 30.056350708007812, 14.062576293945312, 30.912673950195312, 22.144454956054688], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000469.npy"}
|
|
{"epoch": 0.6886930983847284, "step": 470, "batch_size": 64, "mean": 38.34840393066406, "std": 24.011594772338867, "min": -5.076141357421875, "p10": 8.655946350097658, "median": 38.78763961791992, "p90": 66.63794708251953, "max": 102.77564239501953, "pos_frac": 0.953125, "sample": [24.62181854248047, 54.27960968017578, 22.305641174316406, 42.73301696777344, 53.88334655761719, 29.432518005371094, 38.95899200439453, 42.44816589355469, 17.753402709960938, 33.87928009033203, 54.48805236816406, 26.350852966308594, 38.63288116455078, 13.179054260253906, 49.864070892333984, 34.629974365234375, 12.06294059753418, 99.90216064453125, -5.076141357421875, 35.777565002441406, 44.549827575683594, 6.7599639892578125, 44.312522888183594, -3.7939834594726562, 43.6031494140625, 18.98322105407715, 26.067028045654297, 74.61746978759766, 35.86232376098633, 2.3440170288085938, 65.68313598632812, 28.508413314819336, 66.95478820800781, 28.36595916748047, 8.056272506713867, 34.8602294921875, 48.027400970458984, 54.449256896972656, 40.052276611328125, 19.40416717529297, 51.00994110107422, 39.287841796875, 65.89865112304688, 65.2201156616211, 22.31829833984375, 102.77564239501953, 56.10356903076172, 10.055185317993164, 69.29752349853516, 59.76116943359375, -2.597503662109375, 13.182540893554688, 28.708145141601562, 38.94239807128906, 44.90203857421875, 73.17764282226562, 13.549667358398438, 61.62617492675781, 43.701934814453125, 5.3107757568359375, 95.06727600097656, 40.38550567626953, 36.435707092285156, 12.402824401855469], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000470.npy"}
|
|
{"epoch": 0.6901615271659325, "step": 471, "batch_size": 64, "mean": 31.3731689453125, "std": 24.57742691040039, "min": -16.857240676879883, "p10": 1.8243820190429703, "median": 32.074214935302734, "p90": 61.725701904296876, "max": 99.42613220214844, "pos_frac": 0.90625, "sample": [55.52122497558594, -2.7848281860351562, 47.4036865234375, 50.24638366699219, 6.84259033203125, 40.8767204284668, 14.368362426757812, 72.24681091308594, 56.514007568359375, 3.490814208984375, 39.40876770019531, 6.28546142578125, 51.612144470214844, 6.8938446044921875, 89.12779235839844, 5.923858642578125, 11.241214752197266, -1.4592132568359375, 48.368186950683594, 67.089111328125, 99.42613220214844, 4.6474456787109375, 47.457984924316406, 50.81333541870117, 54.26960754394531, -16.857240676879883, -6.5189971923828125, 23.480443954467773, 37.67347717285156, 39.925514221191406, 10.289346694946289, 33.444488525390625, 14.107261657714844, 50.14411163330078, 27.53886604309082, 62.12461471557617, 61.87627410888672, 3.459442138671875, 34.3740234375, 61.11493682861328, 38.46162033081055, 15.200660705566406, 50.95641326904297, 22.279064178466797, 30.99957275390625, 33.37623596191406, 34.8389892578125, 1.1236419677734375, 23.176406860351562, 14.831787109375, -1.5977783203125, 26.907264709472656, -3.769674301147461, 53.537841796875, 18.218902587890625, 18.072509765625, 4.173982620239258, 17.472095489501953, 26.937301635742188, 66.75338745117188, 24.797149658203125, 61.374366760253906, 34.604156494140625, 33.14885711669922], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000471.npy"}
|
|
{"epoch": 0.6916299559471366, "step": 472, "batch_size": 64, "mean": 40.2446174621582, "std": 33.216896057128906, "min": -9.222412109375, "p10": 5.102720642089844, "median": 33.46429443359375, "p90": 81.19024047851563, "max": 138.20571899414062, "pos_frac": 0.9375, "sample": [30.46471405029297, 5.710945129394531, 25.675155639648438, -5.607330322265625, 5.225799560546875, 45.859825134277344, 6.229877471923828, -9.222412109375, 41.72010803222656, -4.850933074951172, 29.734004974365234, 63.18733596801758, 36.445068359375, 10.026914596557617, 33.492095947265625, 121.41729736328125, 36.739845275878906, 12.335494995117188, 23.005828857421875, 33.436492919921875, 116.81573486328125, 18.65457534790039, 33.992340087890625, 37.175498962402344, 3.605794906616211, -3.7319259643554688, 30.691940307617188, 16.653121948242188, 78.44798278808594, 82.36549377441406, 14.976470947265625, 42.72749328613281, 48.61663818359375, 75.39169311523438, 134.95468139648438, 5.0499725341796875, 18.122440338134766, 97.58377075195312, 58.31597900390625, 34.26036834716797, 52.11842346191406, 67.18684387207031, 26.294063568115234, 64.11555480957031, 28.597396850585938, 138.20571899414062, 105.955078125, 37.8204345703125, 57.92366027832031, 33.387855529785156, 48.54193115234375, 17.04857635498047, 54.9114990234375, 8.936958312988281, 49.97947692871094, 54.13496398925781, 58.52496337890625, 25.01032257080078, 2.3975601196289062, 33.23497009277344, 25.485477447509766, 26.480152130126953, 23.945846557617188, 49.721534729003906], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000472.npy"}
|
|
{"epoch": 0.6930983847283406, "step": 473, "batch_size": 64, "mean": 28.15515899658203, "std": 22.63782501220703, "min": -19.25145721435547, "p10": -1.4527803421020495, "median": 29.29606819152832, "p90": 54.7385139465332, "max": 80.83113098144531, "pos_frac": 0.84375, "sample": [13.837730407714844, 22.076414108276367, 39.388946533203125, 59.755767822265625, 17.797527313232422, 32.128814697265625, 19.894668579101562, 16.41362762451172, 41.139801025390625, -0.36353111267089844, 37.8656005859375, 50.335609436035156, -1.9196014404296875, 10.354705810546875, 44.63457489013672, 17.531465530395508, 40.8004150390625, -0.1353759765625, 45.07389450073242, 40.55422592163086, -7.79088020324707, 66.8367919921875, 54.81071472167969, 30.333833694458008, 39.80708694458008, 21.126333236694336, 25.335113525390625, 73.71730041503906, 26.55409049987793, 35.55937957763672, 5.745258331298828, 43.10881805419922, 8.392341613769531, 18.041000366210938, -4.6004486083984375, 11.512775421142578, 80.83113098144531, 10.635108947753906, 42.01557922363281, 66.89398193359375, 22.237258911132812, 38.524253845214844, 43.614105224609375, 16.34130859375, 28.575851440429688, 33.274200439453125, -19.25145721435547, -7.545684814453125, -7.412506103515625, 54.570045471191406, 69.3982162475586, 49.3365478515625, 7.630247116088867, -3.1351242065429688, 30.016284942626953, 14.470836639404297, 41.43329620361328, -0.28057861328125, 38.71898651123047, 53.579063415527344, 4.7105560302734375, 46.419071197509766, 3.658151626586914, 47.016658782958984], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000473.npy"}
|
|
{"epoch": 0.6945668135095447, "step": 474, "batch_size": 64, "mean": 30.685691833496094, "std": 25.61254119873047, "min": -12.856475830078125, "p10": -3.3217891693115233, "median": 28.060593605041504, "p90": 65.52381286621093, "max": 92.20222473144531, "pos_frac": 0.8125, "sample": [22.133270263671875, 27.782304763793945, 12.345890045166016, 68.2520523071289, 91.73446655273438, 30.97478485107422, 86.99765014648438, -4.227571487426758, 50.89583969116211, 21.05217742919922, 23.03689193725586, 35.87664794921875, 5.724756240844727, 16.70209503173828, 10.882125854492188, 43.66058349609375, 17.3743953704834, -0.044116973876953125, -12.856475830078125, 31.001249313354492, 47.22059631347656, 64.70956420898438, -7.128196716308594, -7.316181182861328, -3.044178009033203, 55.697059631347656, 16.57623291015625, 21.305694580078125, 65.66337585449219, 18.874664306640625, -2.817607879638672, 20.116252899169922, 26.103755950927734, 38.85845184326172, 49.66072082519531, 47.52735900878906, 41.87123107910156, 28.973419189453125, 30.344594955444336, -3.440765380859375, 28.338882446289062, 46.495147705078125, 61.519500732421875, 52.43670654296875, -1.199371337890625, 23.818649291992188, 45.33930206298828, 92.20222473144531, 9.303852081298828, -6.030738830566406, 53.96150207519531, 24.798431396484375, 26.64836311340332, 15.541702270507812, 38.4017333984375, 28.401084899902344, 70.33526611328125, 23.98101043701172, 65.19816589355469, -4.09477424621582, 38.79808807373047, 32.45353698730469, -1.502349853515625, 69.6832275390625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000474.npy"}
|
|
{"epoch": 0.6960352422907489, "step": 475, "batch_size": 64, "mean": 34.429412841796875, "std": 30.452960968017578, "min": -12.9737548828125, "p10": 3.837327575683595, "median": 29.204971313476562, "p90": 66.20656356811526, "max": 148.33450317382812, "pos_frac": 0.921875, "sample": [2.21826171875, 11.07187271118164, 46.086021423339844, 6.824989318847656, 33.959869384765625, 21.22238540649414, 53.79197692871094, 5.251182556152344, 148.33450317382812, 53.17350769042969, 27.87751007080078, 27.843521118164062, 57.4976806640625, 9.313064575195312, 55.32916259765625, 18.318227767944336, 122.64566802978516, 32.55418395996094, 43.016876220703125, -6.152317047119141, 26.039249420166016, -12.9737548828125, 43.12904357910156, 19.961442947387695, 16.773616790771484, 43.94123840332031, 68.94225311279297, 26.60291862487793, 34.182987213134766, 30.218170166015625, 12.739313125610352, 24.860809326171875, 3.3069076538085938, 58.907833099365234, 10.284622192382812, 75.34577178955078, 44.238975524902344, 35.194053649902344, 40.48322296142578, 59.82328796386719, 18.485857009887695, 5.449098587036133, 28.1917724609375, -1.6378021240234375, 34.40129852294922, 31.989418029785156, -5.78875732421875, 8.550552368164062, 73.10980987548828, 47.15973663330078, 5.074974060058594, 21.099830627441406, 52.75444793701172, 8.0267333984375, 43.485618591308594, 16.115768432617188, 27.26226806640625, 109.1036376953125, 41.395103454589844, 96.9839859008789, 7.412322998046875, 48.06248474121094, -2.4547481536865234, 57.06885528564453], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000475.npy"}
|
|
{"epoch": 0.697503671071953, "step": 476, "batch_size": 64, "mean": 30.353801727294922, "std": 26.748226165771484, "min": -13.575729370117188, "p10": -2.375325775146484, "median": 28.592604637145996, "p90": 66.67832565307617, "max": 105.05415344238281, "pos_frac": 0.859375, "sample": [53.720611572265625, -1.1030635833740234, 0.45328330993652344, -10.538688659667969, 57.90668869018555, 34.26365661621094, 80.05931091308594, 51.002052307128906, 30.331157684326172, -9.773059844970703, 56.62287139892578, 40.448585510253906, 78.25645446777344, 50.49095153808594, 54.94822311401367, 31.098495483398438, 39.321434020996094, 17.052635192871094, 65.55218505859375, 60.623878479003906, 52.86180877685547, 35.68818664550781, -13.575729370117188, 24.183372497558594, 12.915559768676758, 29.262344360351562, -2.5973892211914062, 17.673110961914062, 3.3055496215820312, 11.479331970214844, 7.961658477783203, -1.857177734375, 4.602508544921875, 9.230537414550781, 47.87788391113281, 25.170780181884766, 21.986618041992188, 10.559457778930664, -4.952301025390625, 35.466094970703125, 5.897457122802734, 20.632797241210938, 45.682640075683594, -9.412017822265625, 3.3431148529052734, 12.500761032104492, 85.83251190185547, 67.57496643066406, 105.05415344238281, 58.930633544921875, 25.268234252929688, 17.71567153930664, 15.65875244140625, 31.25841522216797, -4.78790283203125, 27.92286491394043, 39.635765075683594, 67.16095733642578, 10.472957611083984, 32.43927001953125, 71.43084716796875, 41.79999542236328, 18.75921630859375, 43.89139175415039], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000476.npy"}
|
|
{"epoch": 0.6989720998531571, "step": 477, "batch_size": 64, "mean": 31.857627868652344, "std": 27.865074157714844, "min": -48.500282287597656, "p10": 0.07321891784668039, "median": 32.06553649902344, "p90": 63.10661010742188, "max": 104.81078338623047, "pos_frac": 0.890625, "sample": [21.59693145751953, 48.91667938232422, 18.41392707824707, 23.37976837158203, 78.74626922607422, 61.86102294921875, 63.235260009765625, 31.964805603027344, 28.24633026123047, 54.01570129394531, 33.3388671875, 20.45631980895996, 39.94328308105469, 62.806427001953125, 60.41334533691406, 15.896738052368164, 28.329071044921875, 21.590755462646484, 32.16626739501953, 43.13508605957031, 47.0128173828125, 34.09391784667969, 16.260025024414062, 5.133241653442383, -2.472412109375, 72.38121795654297, -7.870391845703125, 28.723724365234375, 17.1588077545166, 24.314035415649414, -5.700023651123047, 41.80082702636719, -48.500282287597656, -25.208526611328125, 47.563941955566406, 38.89404296875, 12.826889038085938, 38.991180419921875, 55.16204833984375, 22.420063018798828, 17.410140991210938, 0.7684364318847656, 57.493408203125, 66.37828826904297, 23.240325927734375, 34.80757141113281, 44.55652618408203, 51.839927673339844, 36.808258056640625, 9.918590545654297, 104.81078338623047, -23.90998077392578, 1.6171798706054688, 44.15220260620117, 2.0909500122070312, 68.6146240234375, 42.2445068359375, -0.2247314453125, 48.498199462890625, 17.221820831298828, 7.328971862792969, 28.457801818847656, 50.368202209472656, 102.958251953125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000477.npy"}
|
|
{"epoch": 0.7004405286343612, "step": 478, "batch_size": 64, "mean": 34.497291564941406, "std": 29.46787452697754, "min": -18.171356201171875, "p10": 2.762488174438477, "median": 26.741891860961914, "p90": 72.82818222045898, "max": 119.847900390625, "pos_frac": 0.9375, "sample": [13.915102005004883, 32.162776947021484, -0.03363800048828125, 7.135768890380859, 22.27263641357422, 22.45574951171875, 41.54200744628906, 16.950908660888672, 7.689201354980469, 14.260848999023438, -18.171356201171875, 52.177467346191406, 5.110403060913086, 66.12785339355469, 3.14404296875, 53.85450744628906, 18.782424926757812, 100.52369689941406, 26.515472412109375, 33.51543426513672, 2.5989646911621094, -4.01806640625, 12.190433502197266, 47.023780822753906, 40.19054412841797, 102.31565856933594, 31.69927978515625, 12.725568771362305, 8.409294128417969, 40.23906707763672, 119.847900390625, 59.988712310791016, 22.26244354248047, 68.49169921875, 47.5831298828125, 60.513648986816406, 85.72451782226562, 12.177757263183594, 28.87451171875, 14.963920593261719, 57.95960998535156, 58.38202667236328, 2.1740875244140625, -4.81158447265625, 64.92035675048828, 21.682388305664062, 43.489776611328125, 71.980712890625, 13.457000732421875, 64.07417297363281, 73.1913833618164, 34.02154541015625, 26.968311309814453, 41.77751159667969, 0.1587066650390625, 23.286643981933594, 77.58773803710938, 6.83929443359375, 88.56672668457031, 13.290885925292969, 4.666160583496094, 23.29410171508789, 46.03325653076172, 21.101844787597656], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000478.npy"}
|
|
{"epoch": 0.7019089574155654, "step": 479, "batch_size": 64, "mean": 30.368532180786133, "std": 22.986536026000977, "min": -12.185871124267578, "p10": 0.2989475250244156, "median": 31.502525329589844, "p90": 62.25023574829102, "max": 76.89051818847656, "pos_frac": 0.890625, "sample": [6.301445007324219, 38.14228439331055, 44.02178192138672, 8.15899658203125, -5.3148956298828125, 48.90545654296875, 23.625167846679688, 6.037101745605469, 50.346710205078125, -4.122434616088867, 52.474021911621094, 31.70458984375, 18.899124145507812, 39.98658752441406, 64.71135711669922, 30.134109497070312, 59.047569274902344, 10.236862182617188, -8.866920471191406, -0.3267707824707031, -12.185871124267578, 76.12942504882812, 8.564802169799805, 14.956771850585938, 52.027740478515625, 41.91765594482422, 20.80048370361328, 62.30511474609375, 50.48793411254883, -7.480457305908203, 62.12218475341797, 48.54670715332031, 52.085853576660156, 43.97564697265625, 3.4957199096679688, 30.616058349609375, 23.61236572265625, 9.25360107421875, 3.291290283203125, 43.44943618774414, 39.423274993896484, 33.48612976074219, 67.1737060546875, 24.185684204101562, 5.480072021484375, -6.2972412109375, 33.781585693359375, 41.271881103515625, 41.34637451171875, 64.83368682861328, 31.300460815429688, 25.146766662597656, 1.7589569091796875, 16.187698364257812, 22.976242065429688, 34.138275146484375, 39.18658447265625, 51.103485107421875, 31.91118049621582, 76.89051818847656, 26.657424926757812, 67.24099731445312, 28.6669921875, 3.6606979370117188], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000479.npy"}
|
|
{"epoch": 0.7033773861967695, "step": 480, "batch_size": 64, "mean": 36.273193359375, "std": 33.05205535888672, "min": -35.53260803222656, "p10": -1.4272773742675757, "median": 31.12323760986328, "p90": 79.5316505432129, "max": 127.28507995605469, "pos_frac": 0.890625, "sample": [17.473148345947266, 47.32502746582031, -2.3918609619140625, 32.10009765625, 8.977508544921875, 71.97647857666016, 53.796024322509766, 34.330711364746094, 28.216514587402344, 31.009994506835938, 16.787158966064453, 118.94855499267578, -11.999626159667969, 24.2127685546875, 54.58977127075195, 22.041534423828125, -8.073184967041016, 18.17218017578125, 58.32537841796875, 96.51387023925781, 29.798538208007812, -10.660774230957031, 8.099319458007812, 46.1873779296875, 16.26721954345703, 46.6033935546875, 36.25898742675781, 5.743417739868164, 36.27593231201172, 9.685302734375, 56.708831787109375, 84.98973846435547, 76.59484100341797, 24.68228530883789, 5.3401336669921875, 16.530685424804688, 32.679603576660156, 0.8234176635742188, 41.383888244628906, 29.137041091918945, -4.7586212158203125, -35.53260803222656, 59.79834747314453, 25.332130432128906, 127.28507995605469, 30.310728073120117, 68.55332946777344, 63.533355712890625, 17.048492431640625, 13.379579544067383, 1.8767547607421875, -6.8616180419921875, 36.89421081542969, 31.452842712402344, 82.25228881835938, 39.90597152709961, 20.996353149414062, 59.7309684753418, 31.236480712890625, 71.04869079589844, 80.790283203125, 15.093267440795898, 64.38583374023438, 122.2708511352539], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000480.npy"}
|
|
{"epoch": 0.7048458149779736, "step": 481, "batch_size": 64, "mean": 34.74099349975586, "std": 27.138439178466797, "min": -22.65024185180664, "p10": 0.9321571350097688, "median": 33.81380271911621, "p90": 69.24416122436524, "max": 95.69537353515625, "pos_frac": 0.890625, "sample": [32.37665557861328, -0.4157257080078125, 82.04096984863281, 34.67426681518555, 41.19792938232422, 13.483482360839844, 11.660354614257812, 53.31161880493164, 44.24068069458008, 61.02571105957031, 23.464881896972656, 14.696525573730469, 6.8672943115234375, -3.016185760498047, 32.953338623046875, 25.236404418945312, 23.51017951965332, 49.64567565917969, 61.653228759765625, 22.4803466796875, 83.31402587890625, 80.76445007324219, 66.21343231201172, 52.1540641784668, 35.439109802246094, 31.61548614501953, 40.127593994140625, 23.946739196777344, 12.397087097167969, -7.319938659667969, -19.421688079833984, 59.52998352050781, 20.054903030395508, -4.118583679199219, 67.9802017211914, 9.7392578125, 12.056900024414062, 59.728153228759766, 38.83361053466797, 44.7501220703125, 31.10035514831543, 34.70330810546875, 38.93387222290039, 94.40773010253906, 4.077217102050781, 4.223228454589844, 40.477874755859375, 52.71662139892578, 95.69537353515625, 44.58734130859375, 24.959564208984375, 15.862991333007812, -8.720909118652344, 66.30741119384766, 20.1981201171875, 41.89874267578125, -22.65024185180664, 31.83863067626953, 37.5841064453125, 69.78585815429688, 83.5938720703125, 23.85938835144043, 47.90293884277344, 11.207660675048828], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000481.npy"}
|
|
{"epoch": 0.7063142437591777, "step": 482, "batch_size": 64, "mean": 30.073631286621094, "std": 26.09050178527832, "min": -11.088953018188477, "p10": 2.3537872314453128, "median": 24.41065216064453, "p90": 65.52386322021485, "max": 107.620361328125, "pos_frac": 0.90625, "sample": [15.36175537109375, 9.494415283203125, 36.02703857421875, 25.5123291015625, 66.5223388671875, 72.18896484375, 27.40503692626953, 85.9694595336914, 9.639999389648438, 2.3145065307617188, 15.391044616699219, 16.86286163330078, 17.143478393554688, 3.419219970703125, 76.9925765991211, 62.817840576171875, 64.98641967773438, 26.97258758544922, 37.220703125, 12.543956756591797, 5.071987152099609, -6.066558837890625, 27.994598388671875, 59.83155059814453, 7.886631011962891, 20.051483154296875, -2.2414321899414062, 51.92340850830078, 75.33283996582031, 10.365455627441406, -0.013336181640625, 65.75419616699219, 47.264434814453125, 31.60839080810547, 11.497909545898438, 13.314592361450195, 2.4454421997070312, 32.65171813964844, 62.84405517578125, 25.257904052734375, 46.32110595703125, 7.632030487060547, -11.088953018188477, -0.8344802856445312, 30.88249397277832, 17.146514892578125, -2.541259765625, 63.94115447998047, 107.620361328125, 11.592376708984375, 9.456428527832031, 23.563400268554688, 6.9438323974609375, 31.487457275390625, 38.213470458984375, 46.55923080444336, 55.572208404541016, 32.46534729003906, 21.05914306640625, 8.377250671386719, 18.542083740234375, 13.146453857421875, 60.21405029296875, 60.878883361816406], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000482.npy"}
|
|
{"epoch": 0.7077826725403817, "step": 483, "batch_size": 64, "mean": 35.49970245361328, "std": 27.894441604614258, "min": -18.48330307006836, "p10": 2.74029655456543, "median": 33.16633605957031, "p90": 61.394462585449226, "max": 136.12245178222656, "pos_frac": 0.921875, "sample": [-11.142059326171875, 62.01177978515625, 24.246932983398438, 56.18476486206055, 17.733749389648438, 36.38838195800781, 30.2337646484375, 29.69472885131836, 58.058685302734375, 45.318870544433594, 70.85966491699219, 77.672119140625, 56.320396423339844, -5.863739013671875, 33.44416809082031, 32.33169174194336, 85.12586975097656, 32.88850402832031, 3.8947677612304688, 30.24933624267578, 10.403959274291992, 11.766824722290039, 53.072967529296875, 83.01048278808594, 2.6886367797851562, 56.34711456298828, 39.45137023925781, 18.61197280883789, 16.682342529296875, 59.78599548339844, 2.8608360290527344, 40.88732147216797, 1.6775131225585938, 107.01302337646484, 35.34974670410156, 27.845199584960938, 52.32750701904297, 47.35649871826172, 29.093505859375, 58.48710632324219, 46.03984069824219, 37.807373046875, 39.92412567138672, 9.457145690917969, 32.65888595581055, 22.14128875732422, 12.032432556152344, 21.977615356445312, 59.95405578613281, -2.7109222412109375, 38.264122009277344, -14.878021240234375, 11.379173278808594, 136.12245178222656, 23.886550903320312, 52.76123046875, 51.80225372314453, 38.05401611328125, 41.8568115234375, 7.317405700683594, 30.210556030273438, 48.16046905517578, 25.873130798339844, -18.48330307006836], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000483.npy"}
|
|
{"epoch": 0.7092511013215859, "step": 484, "batch_size": 64, "mean": 35.594505310058594, "std": 30.20879554748535, "min": -5.215751647949219, "p10": 2.1252201080322273, "median": 29.749731063842773, "p90": 75.06423950195314, "max": 130.16082763671875, "pos_frac": 0.9375, "sample": [39.96672821044922, 17.46599578857422, 6.155555725097656, 65.48622131347656, 1.8198928833007812, 22.769271850585938, 12.899307250976562, 65.25506591796875, 76.40951538085938, 37.096927642822266, 7.063449859619141, 31.511322021484375, 34.89894104003906, 60.98982238769531, 48.0887336730957, 41.17420196533203, 84.61223602294922, 2.8376502990722656, 25.27886199951172, 15.60845947265625, 39.6492919921875, 28.964794158935547, 51.33393096923828, 67.56640625, 120.874267578125, -4.45793342590332, 27.287628173828125, 92.49838256835938, -5.215751647949219, 10.390228271484375, 0.9523048400878906, 15.700000762939453, 70.47732543945312, 64.7676010131836, 21.28339385986328, 12.069091796875, 25.627197265625, 37.19752502441406, 11.828788757324219, 38.7044677734375, 7.006324768066406, 18.875343322753906, 16.929290771484375, 52.560184478759766, 71.92526245117188, -3.418731689453125, 40.41455078125, 39.68536376953125, 6.570528030395508, 0.32254791259765625, 3.833494186401367, 15.874565124511719, 84.97664642333984, 47.61309051513672, 14.969459533691406, 30.53466796875, 130.16082763671875, 54.406009674072266, 84.22400665283203, -1.8223514556884766, 51.396324157714844, 27.37561798095703, 9.614336013793945, 49.13404083251953], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000484.npy"}
|
|
{"epoch": 0.71071953010279, "step": 485, "batch_size": 64, "mean": 40.27811813354492, "std": 32.17557907104492, "min": -18.691848754882812, "p10": 0.78987216949463, "median": 37.30965232849121, "p90": 86.26026077270508, "max": 107.37789916992188, "pos_frac": 0.921875, "sample": [68.5755615234375, 77.60836791992188, 86.75193786621094, 5.9626617431640625, 85.1130142211914, 1.9172744750976562, 51.20189666748047, -3.89910888671875, 24.969009399414062, 59.648529052734375, 30.814247131347656, 10.719734191894531, 72.42765045166016, 35.866722106933594, 19.00025177001953, 107.37789916992188, 4.03594970703125, 0.027914047241210938, 92.67849731445312, 65.1632080078125, 50.51478576660156, 53.265960693359375, 12.463336944580078, 44.67119598388672, 42.417266845703125, 100.66156005859375, -6.569725036621094, 0.3066997528076172, 34.04298400878906, -2.9770278930664062, 15.072433471679688, 40.09159851074219, 28.39708709716797, 7.18145751953125, 60.384246826171875, 13.760711669921875, 69.65707397460938, 64.09194946289062, 20.986560821533203, 56.374786376953125, 39.54679870605469, 41.0137939453125, 53.312713623046875, 29.980209350585938, 6.217311859130859, 54.13334655761719, 4.529695510864258, 57.08405303955078, 27.24190902709961, -16.898834228515625, 102.70472717285156, 81.01396942138672, 26.32004165649414, 36.93724060058594, 34.18464660644531, 21.721939086914062, 101.84783935546875, 5.8841400146484375, -18.691848754882812, 24.6068115234375, 37.682064056396484, 83.95423889160156, 87.43780517578125, 55.27873229980469], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000485.npy"}
|
|
{"epoch": 0.7121879588839941, "step": 486, "batch_size": 64, "mean": 38.945411682128906, "std": 30.784053802490234, "min": -17.51525115966797, "p10": 0.9036026000976562, "median": 36.53977584838867, "p90": 79.24257583618164, "max": 112.56798553466797, "pos_frac": 0.90625, "sample": [77.10315704345703, 31.13446807861328, 38.37718200683594, 21.01834487915039, 65.48289489746094, 60.61505126953125, 66.5750503540039, 0.893646240234375, 41.64439392089844, 12.672294616699219, 45.44782257080078, 64.31318664550781, 70.21809387207031, 20.316871643066406, 13.545646667480469, 65.44351196289062, 29.432464599609375, 10.982109069824219, 10.602195739746094, 80.15946960449219, 98.58142852783203, 22.771507263183594, 13.595748901367188, -2.2135391235351562, -6.915214538574219, 35.04602813720703, 58.242698669433594, 58.483619689941406, 32.397705078125, 65.55164337158203, 58.582855224609375, 13.646247863769531, -13.016433715820312, 0.9268341064453125, 49.08944320678711, 57.88349151611328, 18.38837432861328, 89.6889419555664, 58.26676940917969, 67.38392639160156, 10.992523193359375, 23.089279174804688, 38.03352355957031, -2.1341476440429688, 58.881980895996094, 84.96273803710938, 80.21368408203125, 46.31382751464844, 57.85514450073242, 99.49569702148438, 3.3491897583007812, 31.145475387573242, 14.66465950012207, 30.056724548339844, 29.1683349609375, 22.49773406982422, 38.41089630126953, 68.71009063720703, -10.655097961425781, 3.0277252197265625, 62.850440979003906, -17.51525115966797, 4.163230895996094, 112.56798553466797], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000486.npy"}
|
|
{"epoch": 0.7136563876651982, "step": 487, "batch_size": 64, "mean": 37.19938278198242, "std": 26.885433197021484, "min": -6.788246154785156, "p10": 5.548789978027345, "median": 34.09355545043945, "p90": 73.12206039428713, "max": 97.73538208007812, "pos_frac": 0.9375, "sample": [53.73793029785156, 75.64063262939453, 20.22425079345703, 35.08793640136719, 84.48111724853516, 44.98322296142578, 48.331939697265625, 17.869384765625, 45.59877014160156, 6.641349792480469, 10.358551025390625, 33.09917449951172, 67.23004150390625, 0.363311767578125, 16.59589385986328, 59.85218811035156, 49.72138977050781, 13.788623809814453, 41.8963508605957, 55.4552001953125, 54.736785888671875, 62.118621826171875, 16.220489501953125, 86.6077880859375, 22.983402252197266, 50.868019104003906, 27.73199462890625, 20.468238830566406, 52.180763244628906, 12.233621597290039, 49.18511199951172, -6.788246154785156, 17.819808959960938, 21.234390258789062, 56.79441833496094, 9.734455108642578, 60.82017517089844, -6.740081787109375, 56.85634994506836, 56.91913986206055, 35.791358947753906, 66.68594360351562, 6.741708755493164, 5.117612838745117, 6.1442108154296875, 78.09293365478516, -2.0332260131835938, 12.395614624023438, 85.34366607666016, 18.571815490722656, 49.911041259765625, 31.36785888671875, 63.47726058959961, 54.15704345703125, 19.68384552001953, 5.293609619140625, 28.671417236328125, 97.73538208007812, 67.24539184570312, 28.405349731445312, -4.66046142578125, 88.9245834350586, 7.7838592529296875, 26.970169067382812], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000487.npy"}
|
|
{"epoch": 0.7151248164464024, "step": 488, "batch_size": 64, "mean": 32.08653259277344, "std": 21.3510799407959, "min": -4.063652038574219, "p10": 10.163520812988281, "median": 30.793113708496094, "p90": 61.48150100708008, "max": 77.51725769042969, "pos_frac": 0.953125, "sample": [-1.6644363403320312, 35.659481048583984, 52.85075378417969, 17.843217849731445, 77.51725769042969, -4.063652038574219, 47.548583984375, 10.440073013305664, 16.766433715820312, 14.956008911132812, 33.47571563720703, 31.173904418945312, 38.917091369628906, 20.217483520507812, 70.53649139404297, 61.521484375, 33.243560791015625, 13.036359786987305, 10.218475341796875, 41.830841064453125, 14.973867416381836, 10.139968872070312, 54.22547912597656, 23.78946876525879, 30.412322998046875, 41.2877197265625, 24.558147430419922, 35.349246978759766, 52.560546875, 6.521360397338867, 32.7537841796875, 50.49077606201172, 11.783134460449219, 17.279212951660156, 73.40199279785156, 35.4920539855957, 61.388206481933594, 75.39036560058594, 44.700653076171875, 19.013099670410156, 51.01256561279297, 76.01560974121094, 17.064769744873047, 26.44683074951172, 25.099702835083008, -2.4857025146484375, 15.215919494628906, 38.946929931640625, 11.668403625488281, 13.836383819580078, 20.938493728637695, 35.03572082519531, 61.1550407409668, 20.562231063842773, 26.52552032470703, 33.35286331176758, 2.4777488708496094, 48.166221618652344, 11.091348648071289, 3.426746368408203, 15.997135162353516, 51.50469970703125, 36.988555908203125, 75.95783996582031], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000488.npy"}
|
|
{"epoch": 0.7165932452276065, "step": 489, "batch_size": 64, "mean": 31.778711318969727, "std": 27.663244247436523, "min": -8.21101188659668, "p10": -1.713649940490722, "median": 27.261679649353027, "p90": 64.39057121276856, "max": 118.59872436523438, "pos_frac": 0.875, "sample": [30.983566284179688, 8.825843811035156, 34.556968688964844, 25.339996337890625, -8.21101188659668, 43.452728271484375, -2.4091243743896484, 59.70030975341797, 16.266372680664062, 21.915081024169922, 19.960697174072266, 6.370464324951172, 31.212692260742188, 19.940696716308594, 48.9031982421875, 19.88067626953125, 16.689327239990234, 70.82490539550781, 17.417198181152344, 62.85075759887695, 51.27525329589844, -1.0301189422607422, 16.404327392578125, 46.14777374267578, 21.048492431640625, -6.015228271484375, 20.64153289794922, -5.484767913818359, 19.491188049316406, 1.5882644653320312, 18.22028350830078, 29.635726928710938, 3.517261505126953, 26.334672927856445, 106.03733825683594, 40.68896484375, 36.740081787109375, 6.462982177734375, 41.062828063964844, 62.64940643310547, 54.64634704589844, 2.226747512817383, 88.49114990234375, 1.3670825958251953, 59.55168914794922, 65.05049133300781, 20.069908142089844, 28.295631408691406, 24.862102508544922, -6.3489990234375, 76.99053192138672, 62.347137451171875, 36.53340148925781, 73.6697769165039, 28.349939346313477, 28.18868637084961, 118.59872436523438, 38.424591064453125, 59.27879333496094, -6.990348815917969, 9.511199951171875, 44.72801971435547, 48.1138916015625, -2.006591796875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000489.npy"}
|
|
{"epoch": 0.7180616740088106, "step": 490, "batch_size": 64, "mean": 31.28500747680664, "std": 29.745311737060547, "min": -30.953392028808594, "p10": -2.26859951019287, "median": 25.51198673248291, "p90": 72.8109733581543, "max": 102.01776123046875, "pos_frac": 0.859375, "sample": [21.032989501953125, 3.6808547973632812, 38.25250244140625, -1.277475357055664, 29.612579345703125, 4.943794250488281, 39.92274475097656, -14.24831771850586, 62.63996124267578, -2.804502487182617, -9.321037292480469, -7.271881103515625, 22.315582275390625, 15.739664077758789, 41.812644958496094, 10.94921875, -30.953392028808594, 64.84564971923828, 11.795440673828125, 1.5639266967773438, -0.6351261138916016, 26.35711669921875, 11.816112518310547, 8.776704788208008, 8.576669692993164, 73.21892547607422, 32.48124694824219, 25.895437240600586, -2.6933670043945312, 57.10161590576172, 25.128536224365234, 24.222015380859375, 37.03215026855469, 71.18305969238281, 39.053123474121094, 17.355857849121094, 71.85908508300781, 43.79176330566406, 102.01776123046875, 7.727970123291016, 56.54051208496094, 86.44902801513672, 68.57919311523438, 74.01544189453125, 37.76507568359375, 92.84451293945312, -6.6092071533203125, 44.59699249267578, 8.776702880859375, 36.39588928222656, 65.06236267089844, 14.181333541870117, 52.38250732421875, 8.63037109375, 55.758819580078125, 51.89055633544922, 8.176483154296875, 5.309783935546875, 19.500259399414062, 87.60301971435547, 18.182186126708984, 84.63540649414062, 16.956283569335938, 31.11933708190918], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000490.npy"}
|
|
{"epoch": 0.7195301027900147, "step": 491, "batch_size": 64, "mean": 28.079666137695312, "std": 27.855226516723633, "min": -31.170894622802734, "p10": -0.37073593139648364, "median": 24.21238899230957, "p90": 62.17767715454103, "max": 117.2748794555664, "pos_frac": 0.890625, "sample": [57.86668395996094, -7.190908432006836, 90.73269653320312, 34.87300109863281, 53.34748840332031, 18.18707847595215, 11.911575317382812, 1.7764892578125, 24.641468048095703, 21.212295532226562, 16.990703582763672, 25.7476806640625, 13.918067932128906, 26.214454650878906, 43.200714111328125, 25.665332794189453, 7.534210205078125, 18.959091186523438, 11.829010009765625, 38.39915466308594, 31.442031860351562, -2.4869213104248047, 58.291168212890625, 3.5089492797851562, 15.127164840698242, 18.169355392456055, 58.799842834472656, -5.547262191772461, 41.98051452636719, 9.570261001586914, 32.65257263183594, 68.7325439453125, -17.630348205566406, 6.48988151550293, 37.03141784667969, 52.70957946777344, 0.37268829345703125, 89.9627914428711, 51.288604736328125, 33.14686584472656, 29.5191650390625, 4.624509811401367, 117.2748794555664, 7.653772354125977, 3.221588134765625, 41.12373733520508, -0.6893463134765625, 48.702232360839844, 2.8724517822265625, 54.27943801879883, 12.82632827758789, 2.3216552734375, 63.62532043457031, 25.06293487548828, 19.05852699279785, 28.296035766601562, 82.34024810791016, 19.692337036132812, 23.783309936523438, 22.277023315429688, 79.1263656616211, -6.94439697265625, 28.793380737304688, -31.170894622802734], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000491.npy"}
|
|
{"epoch": 0.7209985315712188, "step": 492, "batch_size": 64, "mean": 26.099708557128906, "std": 30.116130828857422, "min": -31.390335083007812, "p10": -14.381443977355955, "median": 26.454147338867188, "p90": 70.02894439697266, "max": 90.36957550048828, "pos_frac": 0.796875, "sample": [11.327232360839844, 30.789710998535156, 22.696083068847656, -9.977178573608398, 33.514404296875, -17.684160232543945, 27.033905029296875, 32.58496856689453, 21.196949005126953, 26.299457550048828, 15.365669250488281, 20.54408073425293, -25.78583526611328, 41.373199462890625, -31.390335083007812, 90.36957550048828, -5.482536315917969, 54.25146484375, 12.196737289428711, 89.05291748046875, -10.562471389770508, 25.791685104370117, 39.521820068359375, 20.94390869140625, 74.60194396972656, 62.011783599853516, 57.433692932128906, 28.316375732421875, 30.761791229248047, 21.644996643066406, -5.999900817871094, 0.7513484954833984, 6.051425933837891, -23.80632781982422, -12.503450393676758, 67.6597900390625, 71.04429626464844, 8.430316925048828, -25.366836547851562, 44.721473693847656, 64.38369750976562, 7.18743896484375, 46.025306701660156, 13.457212448120117, -4.298763275146484, 73.2214126586914, 38.760929107666016, 21.034015655517578, 54.712432861328125, 26.608837127685547, 28.368377685546875, 40.30467987060547, 31.07353973388672, -15.186298370361328, 29.31005859375, 41.149417877197266, 71.34100341796875, 13.367130279541016, 3.3482398986816406, 81.76187133789062, -27.895078659057617, 51.47879409790039, 14.11231803894043, 47.03082275390625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000492.npy"}
|
|
{"epoch": 0.7224669603524229, "step": 493, "batch_size": 64, "mean": 32.915733337402344, "std": 26.52034568786621, "min": -27.147605895996094, "p10": -2.360428619384765, "median": 30.162936210632324, "p90": 63.110100555419926, "max": 96.30465698242188, "pos_frac": 0.875, "sample": [44.2149772644043, 17.56307601928711, 96.30465698242188, 42.61035919189453, 4.7509918212890625, 45.197906494140625, 44.33859634399414, 32.0438232421875, -8.24456787109375, -27.147605895996094, 87.96514129638672, 20.582778930664062, 48.856353759765625, 17.763290405273438, 29.055038452148438, 47.160247802734375, 13.373664855957031, 34.1185302734375, 35.51937484741211, 9.5059814453125, 28.027423858642578, 58.955501556396484, 88.4402084350586, 72.53531646728516, 60.420413970947266, 35.761573791503906, 46.86212158203125, -1.7423477172851562, 12.298318862915039, 27.88751983642578, -9.913299560546875, 30.76230812072754, 62.47583770751953, 24.094085693359375, 75.97547149658203, 27.490327835083008, 24.007869720458984, 63.381927490234375, 26.556549072265625, -3.300628662109375, 19.102088928222656, 35.450592041015625, 29.56356430053711, 20.653099060058594, 58.98609161376953, 92.16537475585938, 11.20954704284668, 38.487083435058594, 49.75880432128906, 52.70049285888672, 47.129676818847656, 11.594144821166992, -2.6253204345703125, 46.96192169189453, -6.927604675292969, -10.169181823730469, 6.209384918212891, 23.93191909790039, 59.1053466796875, 25.195812225341797, 3.0030288696289062, 34.34832763671875, 20.101333618164062, 54.16249084472656], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000493.npy"}
|
|
{"epoch": 0.723935389133627, "step": 494, "batch_size": 64, "mean": 35.46459197998047, "std": 30.0831356048584, "min": -25.38532257080078, "p10": -1.355794525146484, "median": 30.03800678253174, "p90": 74.9020278930664, "max": 99.82002258300781, "pos_frac": 0.859375, "sample": [-0.9773292541503906, 46.87248992919922, 21.327774047851562, 73.92727661132812, 68.33971405029297, 59.16770935058594, -17.36682891845703, 99.82002258300781, 26.61577796936035, 7.576333999633789, 50.076148986816406, 28.752010345458984, -11.480171203613281, 56.08973693847656, 28.871610641479492, 75.31977844238281, 60.06745147705078, -1.6765213012695312, 53.5321044921875, 86.25969696044922, 4.352985382080078, -25.38532257080078, 24.627548217773438, 40.094757080078125, 44.22666931152344, 21.674007415771484, 62.24793243408203, 83.37274932861328, 33.5682487487793, 22.556758880615234, 24.751075744628906, -5.394439697265625, 36.86428451538086, 30.752975463867188, 65.3305892944336, 48.405982971191406, 68.027587890625, 17.84522247314453, 3.6258392333984375, 95.21023559570312, 72.72154998779297, -0.9648323059082031, -1.5179939270019531, 13.097129821777344, 10.704559326171875, 29.32303810119629, 58.727386474609375, 37.83528137207031, 53.934486389160156, 21.963726043701172, 12.495645523071289, -12.105926513671875, 95.806640625, 40.171695709228516, 54.378936767578125, 19.658740997314453, 86.58111572265625, 18.62970733642578, 38.649620056152344, 54.46171569824219, 3.4959335327148438, 25.459064483642578, 24.114105224609375, 4.2419891357421875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000494.npy"}
|
|
{"epoch": 0.7254038179148311, "step": 495, "batch_size": 64, "mean": 32.37921142578125, "std": 27.76520347595215, "min": -36.897186279296875, "p10": 0.625724029541016, "median": 31.174007415771484, "p90": 63.72550811767579, "max": 107.20719909667969, "pos_frac": 0.90625, "sample": [55.21271514892578, 2.629617691040039, 77.06730651855469, 52.84123229980469, 1.0661239624023438, 18.84208106994629, -8.83721923828125, 6.549224853515625, 45.96910095214844, 12.242904663085938, 19.83422088623047, 38.19731140136719, 19.16785430908203, -9.578550338745117, 51.05413818359375, 10.16252326965332, -8.27165412902832, 67.45836639404297, 33.687713623046875, 46.025848388671875, 19.92080307006836, -36.897186279296875, 38.19865036010742, 9.786418914794922, 31.487266540527344, 94.31887817382812, 58.44585418701172, -8.095539093017578, 49.624534606933594, 30.860748291015625, 34.296600341796875, 62.678466796875, 13.781726837158203, 9.780227661132812, 38.23194122314453, 107.20719909667969, 53.331939697265625, 8.24612808227539, 49.00157165527344, 58.759552001953125, 52.048988342285156, 46.458213806152344, 15.496315002441406, 22.757659912109375, 20.72562026977539, 38.964447021484375, -9.958419799804688, 18.78436279296875, 27.801942825317383, 0.436981201171875, 12.063621520996094, 10.761848449707031, 26.67877197265625, 52.708282470703125, 62.518768310546875, 33.703277587890625, 16.597793579101562, 37.972721099853516, 29.45740509033203, 58.01512145996094, 64.17424011230469, 96.98150634765625, 66.98406982421875, 15.849266052246094], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000495.npy"}
|
|
{"epoch": 0.7268722466960352, "step": 496, "batch_size": 64, "mean": 28.696502685546875, "std": 25.869693756103516, "min": -29.316619873046875, "p10": -0.26025428771972525, "median": 24.60771369934082, "p90": 63.10016708374024, "max": 99.02214813232422, "pos_frac": 0.890625, "sample": [23.393035888671875, 2.3224639892578125, 56.670166015625, 63.58998107910156, 16.09039306640625, 84.02322387695312, 24.8497314453125, 35.809051513671875, 40.86289596557617, 14.97332763671875, 41.99119186401367, 1.0203170776367188, 22.63665771484375, 21.416366577148438, 71.87700653076172, 10.683917999267578, 13.099594116210938, 28.057044982910156, 23.982959747314453, -8.714065551757812, 30.732391357421875, 60.18950653076172, 75.95254516601562, 36.28882598876953, 99.02214813232422, 47.58735275268555, 9.002655029296875, 56.55763244628906, 25.71527862548828, -1.241607666015625, 19.759933471679688, 14.612405776977539, 6.697906494140625, 24.36569595336914, 13.444602966308594, -29.316619873046875, 20.553131103515625, 13.614471435546875, 9.337287902832031, 17.431312561035156, -0.8090705871582031, 18.20201301574707, 32.25717544555664, -16.39122772216797, 31.078109741210938, 63.680320739746094, 33.77641296386719, 46.194236755371094, 90.55638122558594, 61.95726776123047, 4.0688323974609375, 4.17132568359375, 38.392311096191406, 40.764556884765625, 28.092506408691406, 43.084564208984375, 37.97726058959961, 47.13264465332031, 7.582736968994141, -5.5378265380859375, 58.93152618408203, 30.864437103271484, 6.783912658691406, -5.176307678222656], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000496.npy"}
|
|
{"epoch": 0.7283406754772394, "step": 497, "batch_size": 64, "mean": 28.408655166625977, "std": 28.08185577392578, "min": -20.001129150390625, "p10": -2.7525682449340816, "median": 20.523695945739746, "p90": 70.17958908081057, "max": 88.09044647216797, "pos_frac": 0.859375, "sample": [4.004306793212891, 64.66168975830078, 15.509674072265625, 60.044708251953125, 3.327392578125, 14.804443359375, 83.71427917480469, 0.7346649169921875, 9.403512954711914, 2.332155227661133, 17.016769409179688, 58.123931884765625, 83.77119445800781, 30.64307403564453, 9.543731689453125, -20.001129150390625, 88.09044647216797, 56.58506774902344, 36.43804168701172, 21.396453857421875, 17.947799682617188, -2.9765090942382812, 75.60181427001953, 5.345069885253906, -4.3372344970703125, 19.366056442260742, 11.049484252929688, -9.784271240234375, 31.966445922851562, -1.8145637512207031, 59.12571716308594, 30.23113250732422, 14.554996490478516, 59.15070343017578, 22.503080368041992, 80.03202819824219, 10.626922607421875, 72.54440307617188, 23.638614654541016, 4.874076843261719, 45.427635192871094, 42.460411071777344, -4.995687484741211, 78.76995849609375, 13.08676528930664, 19.650938034057617, 50.794891357421875, 5.6862945556640625, 50.32377624511719, 42.12799835205078, 54.63985061645508, 9.173194885253906, 31.495075225830078, 43.22930908203125, 6.420316696166992, 19.177276611328125, 4.7382659912109375, 62.53934860229492, -2.230039596557617, 24.10149383544922, 53.037689208984375, -19.76739501953125, 38.09980010986328, -9.62343978881836], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000497.npy"}
|
|
{"epoch": 0.7298091042584435, "step": 498, "batch_size": 64, "mean": 30.984760284423828, "std": 27.986759185791016, "min": -20.92620849609375, "p10": -2.9834363937377923, "median": 25.839611053466797, "p90": 68.45401458740234, "max": 109.10934448242188, "pos_frac": 0.875, "sample": [15.457342147827148, 38.75053024291992, 9.097450256347656, 4.641357421875, 59.561485290527344, 20.625991821289062, 68.24392700195312, 68.54405212402344, 40.64905548095703, 25.83716583251953, 25.159984588623047, 21.65222930908203, 29.666763305664062, 56.64368438720703, 10.576309204101562, 72.68724060058594, 37.054229736328125, 54.430320739746094, 19.7093505859375, 85.95889282226562, 18.400650024414062, 21.923690795898438, -14.520599365234375, -4.452384948730469, -20.92620849609375, 34.12049102783203, 5.54876708984375, 8.326377868652344, 27.645713806152344, 23.652969360351562, 21.534807205200195, 25.681852340698242, -16.1361083984375, 101.69866180419922, 6.825920104980469, -3.242156982421875, 4.053779602050781, 16.340652465820312, 38.82582092285156, 69.46064758300781, 109.10934448242188, 22.309547424316406, 38.43145751953125, 40.09852981567383, 84.95881652832031, 61.853111267089844, 2.1374969482421875, 43.95263671875, 43.992366790771484, 45.92017364501953, 38.977569580078125, 10.695598602294922, 27.1761474609375, 38.82014465332031, -11.546066284179688, 10.173309326171875, 51.21703338623047, 21.74225616455078, 64.59004974365234, 25.842056274414062, -2.3797550201416016, 51.940765380859375, 39.929534912109375, -6.628213882446289], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000498.npy"}
|
|
{"epoch": 0.7312775330396476, "step": 499, "batch_size": 64, "mean": 31.55392837524414, "std": 30.621047973632812, "min": -47.15885925292969, "p10": -1.491476058959961, "median": 31.013694763183594, "p90": 67.76062240600587, "max": 111.42703247070312, "pos_frac": 0.859375, "sample": [67.64329528808594, 42.10066604614258, 30.991065979003906, -10.106546401977539, 15.432365417480469, 39.42071533203125, 50.326019287109375, 35.44099426269531, -0.8167037963867188, -11.39813232421875, 6.650976181030273, 67.81090545654297, 3.9622058868408203, 81.96533203125, 18.070350646972656, 60.574440002441406, 42.18043518066406, 33.353004455566406, 111.42703247070312, 41.97468566894531, 40.16954040527344, 24.134532928466797, 59.57000732421875, 2.5413436889648438, 9.488067626953125, 33.74554443359375, 49.29901885986328, 59.71903991699219, -1.4327564239501953, 31.492759704589844, 18.22763442993164, 0.2719879150390625, 64.48238372802734, 23.03594970703125, 22.058273315429688, 36.52838897705078, 28.838211059570312, 63.36293029785156, 31.03632354736328, -47.15885925292969, 11.305997848510742, 27.973854064941406, 16.018829345703125, 81.3860092163086, 51.282859802246094, 27.644134521484375, 69.5053939819336, 13.047859191894531, 1.3800582885742188, 43.858150482177734, 26.948951721191406, 77.98101806640625, 27.71825408935547, 65.32522583007812, 5.611019134521484, 17.31987762451172, 47.2833251953125, 39.01500701904297, -33.630035400390625, -17.66172218322754, -4.916316986083984, 39.48097229003906, -1.516641616821289, 110.67579650878906], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000499.npy"}
|
|
{"epoch": 0.7327459618208517, "step": 500, "batch_size": 64, "mean": 33.82952117919922, "std": 29.376726150512695, "min": -19.078529357910156, "p10": 2.8274232864379893, "median": 26.33731746673584, "p90": 66.26198883056642, "max": 137.1235809326172, "pos_frac": 0.90625, "sample": [3.9797420501708984, 17.152660369873047, 14.567955017089844, 137.1235809326172, 30.962913513183594, 61.61417770385742, 68.01683044433594, 71.74461364746094, 20.531116485595703, 32.89311981201172, 62.1673583984375, 23.032379150390625, 24.621337890625, 33.05024337768555, -0.6707820892333984, 6.118408203125, 24.375457763671875, -0.5758056640625, 24.133304595947266, 30.456451416015625, 88.75797271728516, -10.039382934570312, 30.705673217773438, 46.69971466064453, 19.141525268554688, 121.65202331542969, 9.687431335449219, 62.07020568847656, 44.81761932373047, -19.078529357910156, 18.949493408203125, 88.69021606445312, 29.407028198242188, 16.706480026245117, 23.504966735839844, 47.68405532836914, 30.383121490478516, 55.95105743408203, 35.69287872314453, 9.5081787109375, 28.05329704284668, 49.010501861572266, 12.304328918457031, 59.133453369140625, 17.096092224121094, -1.9416255950927734, 44.35022735595703, 24.005760192871094, 2.3335723876953125, 46.49052429199219, 101.77967834472656, 13.779075622558594, 39.43646240234375, 14.510360717773438, 48.18316650390625, 38.86479949951172, 19.66387939453125, 17.66021728515625, 32.65834045410156, 24.580339431762695, -3.5523815155029297, 58.54961395263672, 20.12880516052246, 21.823974609375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000500.npy"}
|
|
{"epoch": 0.7342143906020558, "step": 501, "batch_size": 64, "mean": 37.292991638183594, "std": 29.057201385498047, "min": -12.509441375732422, "p10": 5.631687164306641, "median": 34.424964904785156, "p90": 73.73782653808594, "max": 128.81259155273438, "pos_frac": 0.9375, "sample": [19.583633422851562, 38.318668365478516, 91.64916229248047, 5.6201629638671875, 41.5474739074707, 48.25801086425781, 9.819507598876953, 75.6893539428711, 43.12532043457031, 2.3806533813476562, 47.800071716308594, 9.921710968017578, 58.69822692871094, 16.669090270996094, 128.81259155273438, 23.16376495361328, 76.69480895996094, 18.21782684326172, 31.85251808166504, 74.1844253540039, 60.836097717285156, 47.64635467529297, 6.3021240234375, 19.6623477935791, 56.967742919921875, 12.837387084960938, 27.820297241210938, 43.0584716796875, 66.62910461425781, 18.346145629882812, -1.9744338989257812, 29.30279541015625, 4.102943420410156, 20.617794036865234, 34.499847412109375, 123.46360778808594, 23.510498046875, 44.1503791809082, 37.252418518066406, 50.91841125488281, 57.68308639526367, 47.44666290283203, 26.8873348236084, 9.706588745117188, 64.82002258300781, 70.51504516601562, -12.509441375732422, 34.35008239746094, 31.165714263916016, -3.0836410522460938, 27.36212158203125, 18.436447143554688, 9.628849029541016, -11.867149353027344, 5.658576965332031, 10.589637756347656, 44.463233947753906, 38.345726013183594, 73.891845703125, 48.17694091796875, 54.163360595703125, 67.63566589355469, 73.37844848632812, 11.949151992797852], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000501.npy"}
|
|
{"epoch": 0.73568281938326, "step": 502, "batch_size": 64, "mean": 33.75102996826172, "std": 21.826812744140625, "min": -11.148231506347656, "p10": 3.6179193496704127, "median": 37.08907127380371, "p90": 59.08996772766114, "max": 102.76698303222656, "pos_frac": 0.9375, "sample": [47.29993438720703, -3.6458473205566406, 2.082387924194336, 59.803958892822266, 26.163436889648438, 39.12519073486328, -8.536262512207031, 10.7310791015625, 68.33824157714844, 37.62247085571289, 40.07798767089844, 60.02720642089844, 49.445777893066406, 33.327903747558594, 56.22489929199219, -5.563621520996094, 27.961647033691406, 2.5184688568115234, 14.77801513671875, 20.00201416015625, 42.73473358154297, 29.892475128173828, 35.00914001464844, 42.263832092285156, 7.22613525390625, 45.78353500366211, 27.749534606933594, 7.882083892822266, 66.44778442382812, 39.09822463989258, 43.669189453125, 102.76698303222656, 6.1833038330078125, 16.496089935302734, 51.255409240722656, 47.42534637451172, 42.29156494140625, 42.874168395996094, -11.148231506347656, 19.39750862121582, 39.662193298339844, 49.024574279785156, 37.988311767578125, 49.606201171875, 51.421653747558594, 57.423988342285156, 61.024818420410156, 19.1512393951416, 37.83381652832031, 15.02423095703125, 51.6343994140625, 25.401641845703125, 36.55567169189453, 12.302703857421875, 73.91924285888672, 12.161067962646484, 37.762908935546875, 20.306243896484375, 54.756256103515625, 32.20429229736328, 36.33422088623047, 35.11738204956055, 0.987335205078125, 29.377853393554688], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000502.npy"}
|
|
{"epoch": 0.737151248164464, "step": 503, "batch_size": 64, "mean": 34.12990188598633, "std": 23.525371551513672, "min": -16.68000030517578, "p10": 0.8879741668701182, "median": 35.727752685546875, "p90": 60.31672592163086, "max": 77.80168151855469, "pos_frac": 0.90625, "sample": [9.478763580322266, -7.293657302856445, 59.579994201660156, 22.21295166015625, 21.664840698242188, -10.64013671875, 69.41532897949219, 35.7420654296875, 1.8334617614746094, 19.57114028930664, 12.450305938720703, 48.12989044189453, 41.053733825683594, 30.385772705078125, 74.78306579589844, 57.2659912109375, 38.161930084228516, 15.357574462890625, 38.66217803955078, 59.63584899902344, 58.05607604980469, 44.85984420776367, 29.438278198242188, 24.735153198242188, 61.528297424316406, 51.89849853515625, -16.68000030517578, 59.534629821777344, 12.880090713500977, 35.48957824707031, 56.45111846923828, -2.70501708984375, 8.91461181640625, 28.986968994140625, -3.829425811767578, 35.71343994140625, 57.3338623046875, 42.597259521484375, 49.66441345214844, 52.42658233642578, 0.48276519775390625, 34.09876251220703, 51.02409362792969, 48.05316925048828, -8.523887634277344, 33.669921875, 21.118196487426758, 52.85521697998047, 62.88164520263672, 59.92132568359375, 34.608856201171875, 22.100730895996094, 60.486183166503906, 77.80168151855469, 71.68942260742188, 18.57867431640625, 16.759288787841797, 37.98594665527344, 17.893653869628906, 2.5509796142578125, 2.5852737426757812, 45.89768981933594, 54.057708740234375, 40.991119384765625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000503.npy"}
|
|
{"epoch": 0.7386196769456681, "step": 504, "batch_size": 64, "mean": 39.98591613769531, "std": 26.27921485900879, "min": -12.201004028320312, "p10": 6.993256759643557, "median": 35.44336128234863, "p90": 75.76621398925782, "max": 111.91203308105469, "pos_frac": 0.953125, "sample": [8.82489013671875, 23.63416290283203, 25.33990478515625, 26.203292846679688, 18.726577758789062, 64.3951416015625, 59.949562072753906, 2.962646484375, 96.4140625, 82.9815444946289, 111.91203308105469, 19.341224670410156, 41.76519775390625, 39.602394104003906, 2.677581787109375, 34.27581024169922, 83.46216583251953, 30.420196533203125, 28.691627502441406, 32.17399597167969, 71.70365142822266, 40.69127655029297, 44.39668273925781, 60.47285461425781, 43.07776641845703, -1.1373405456542969, 26.254592895507812, 37.41693878173828, 36.29045486450195, 33.60087585449219, -12.201004028320312, 82.6810302734375, 35.6701545715332, 59.50862503051758, 21.98892593383789, 29.213638305664062, 58.27667236328125, 48.98883056640625, -0.4891242980957031, 46.57587432861328, 30.36260986328125, 99.626708984375, 19.03050994873047, 6.208271026611328, 75.30641174316406, 23.310028076171875, 24.931550979614258, 12.416610717773438, 33.003570556640625, 40.08650207519531, 42.37748718261719, 35.21656799316406, 75.96327209472656, 67.49308776855469, 10.92940902709961, 34.71929931640625, 31.326431274414062, 53.098487854003906, 65.47596740722656, 17.479217529296875, 46.244590759277344, 74.99351501464844, 37.60223388671875, 5.161102294921875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000504.npy"}
|
|
{"epoch": 0.7400881057268722, "step": 505, "batch_size": 64, "mean": 28.277128219604492, "std": 29.574512481689453, "min": -35.14299011230469, "p10": -1.4594322204589836, "median": 20.640634536743164, "p90": 71.42545623779299, "max": 103.718994140625, "pos_frac": 0.84375, "sample": [11.36932373046875, 74.81355285644531, 11.556060791015625, 63.4846305847168, -0.2545318603515625, 103.718994140625, 0.97772216796875, 20.51763153076172, 67.26556396484375, 18.505386352539062, 60.627342224121094, 46.30567932128906, 86.07691192626953, -1.7623138427734375, 14.360527038574219, 46.2489013671875, 18.43444061279297, 26.025909423828125, 38.968902587890625, 1.7695350646972656, 21.120935440063477, 73.20826721191406, 38.461700439453125, 11.83819580078125, -21.593902587890625, 45.89923858642578, -12.87490463256836, 42.03693389892578, 58.07682800292969, 76.96446990966797, 8.551807403564453, 35.95436096191406, 37.6668701171875, -17.924270629882812, 20.76363754272461, 58.88081359863281, 38.05738067626953, 15.625646591186523, 6.088033676147461, 0.7597064971923828, 24.90888786315918, -35.14299011230469, 9.132549285888672, 42.314231872558594, 11.963531494140625, 16.27521514892578, 60.47735595703125, 58.70671844482422, 27.813739776611328, 94.1077880859375, 25.367816925048828, 1.7012100219726562, -0.4291725158691406, 79.94389343261719, 58.12210464477539, -0.7527084350585938, 11.887447357177734, 7.445770263671875, 19.60906982421875, 17.650848388671875, 27.400314331054688, -6.172832489013672, 12.8038330078125, -1.9702777862548828], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000505.npy"}
|
|
{"epoch": 0.7415565345080763, "step": 506, "batch_size": 64, "mean": 33.98353576660156, "std": 29.518352508544922, "min": -16.371824264526367, "p10": -0.021889114379882113, "median": 30.14310073852539, "p90": 77.02998962402344, "max": 131.52120971679688, "pos_frac": 0.890625, "sample": [-12.327789306640625, 10.306350708007812, 32.401451110839844, 21.172805786132812, 24.12906265258789, 44.06566619873047, 0.6538887023925781, 43.684478759765625, 81.68519592285156, 17.94295883178711, 50.346126556396484, 25.603469848632812, 18.096343994140625, 37.09062957763672, 9.0521240234375, 36.7139892578125, 46.703853607177734, 22.823394775390625, 31.203346252441406, 36.195350646972656, 15.996002197265625, 89.85189056396484, 94.57049560546875, 30.34522247314453, 131.52120971679688, -1.6149024963378906, 21.037986755371094, 58.211456298828125, 36.6644287109375, -12.093513488769531, 42.96104049682617, 24.456117630004883, 65.51148223876953, 21.01932144165039, 30.467208862304688, -10.538627624511719, 71.94772338867188, 67.95333862304688, 18.452972412109375, 91.4810791015625, 53.18119812011719, 51.20403289794922, 29.921859741210938, 12.127029418945312, -16.371824264526367, 16.07740592956543, 35.7037353515625, 31.083656311035156, 74.45025634765625, 58.252410888671875, 21.273237228393555, 4.227821350097656, 29.94097900390625, 41.09191131591797, 78.13558959960938, 85.55561828613281, 3.677154541015625, -1.9102516174316406, 26.722457885742188, 4.874725341796875, -0.3115081787109375, 29.472183227539062, 36.467811584472656, 4.3540496826171875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000506.npy"}
|
|
{"epoch": 0.7430249632892805, "step": 507, "batch_size": 64, "mean": 31.98436737060547, "std": 22.67708396911621, "min": -2.321441650390625, "p10": 1.0112215042114259, "median": 32.28876304626465, "p90": 59.337502288818364, "max": 112.11190795898438, "pos_frac": 0.9375, "sample": [9.680784225463867, 1.0511703491210938, 1.3469619750976562, 28.948890686035156, 63.78120422363281, 44.70896911621094, 22.751373291015625, 45.34777069091797, -1.0498504638671875, 56.161773681640625, 36.7554931640625, 44.953834533691406, 19.691864013671875, 45.13294219970703, 40.16998291015625, 28.907806396484375, 6.340568542480469, 30.312042236328125, 33.931427001953125, 112.11190795898438, 6.79949951171875, 31.570480346679688, 0.6943302154541016, 43.204307556152344, 43.047584533691406, 51.518402099609375, 31.474822998046875, 36.26971435546875, 3.883289337158203, 44.06683349609375, 33.00704574584961, 23.426612854003906, -2.300994873046875, -0.9668979644775391, 40.448822021484375, 74.88475036621094, 18.76498794555664, 64.57550811767578, 40.791526794433594, 19.736618041992188, 63.96760559082031, 5.6130218505859375, 50.17244338989258, 27.11248016357422, 59.963775634765625, 24.52239227294922, 19.36821746826172, 38.53321838378906, 40.93724822998047, 51.208404541015625, 33.9902229309082, 22.174848556518555, 0.9941005706787109, 51.423622131347656, 26.050033569335938, 57.876197814941406, 47.023193359375, 65.4707260131836, -2.321441650390625, 0.05756950378417969, 6.5009918212890625, 11.261062622070312, 19.286340713500977, 49.87921142578125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000507.npy"}
|
|
{"epoch": 0.7444933920704846, "step": 508, "batch_size": 64, "mean": 32.10009002685547, "std": 28.83991241455078, "min": -20.232826232910156, "p10": -0.9938760757446289, "median": 26.939682960510254, "p90": 64.65440368652344, "max": 108.2960433959961, "pos_frac": 0.84375, "sample": [-2.1569366455078125, 23.40594482421875, -20.232826232910156, 17.698280334472656, 26.311309814453125, -0.37255859375, 0.9139556884765625, 98.24627685546875, 49.921142578125, 49.88561248779297, 50.085784912109375, 21.9498291015625, 40.765586853027344, 49.2547721862793, 67.50978088378906, 45.466583251953125, 60.288333892822266, 41.35725402832031, 108.2960433959961, 47.060089111328125, 6.862216949462891, 38.12901306152344, 24.34084701538086, 42.57593536376953, 61.10919189453125, 57.14118957519531, 7.4362640380859375, 3.5545654296875, 8.645009994506836, 20.069480895996094, -0.3996429443359375, 28.080604553222656, 3.8154144287109375, -1.0213069915771484, 100.97149658203125, -2.3558921813964844, 20.273963928222656, 29.76982307434082, 44.220428466796875, 15.1795654296875, 32.36329650878906, 59.87969207763672, 12.7662353515625, 9.822235107421875, 57.28532409667969, 27.21826171875, 26.661104202270508, 7.628501892089844, 55.07366180419922, 63.088600158691406, 81.17657470703125, 28.785545349121094, -1.0844383239746094, 55.84937286376953, 65.3254623413086, 23.69152069091797, -0.92987060546875, 12.495834350585938, 17.924667358398438, 84.21636199951172, -15.39068603515625, 13.922874450683594, -6.861623764038086, 59.44469451904297], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000508.npy"}
|
|
{"epoch": 0.7459618208516887, "step": 509, "batch_size": 64, "mean": 32.780052185058594, "std": 26.028345108032227, "min": -23.97222900390625, "p10": 2.45704517364502, "median": 30.99527645111084, "p90": 62.87817382812501, "max": 108.38638305664062, "pos_frac": 0.90625, "sample": [2.3123397827148438, 13.495193481445312, 21.4609375, 51.843353271484375, 20.454742431640625, 79.91510009765625, 21.41468048095703, 24.993560791015625, 61.25025939941406, 44.082794189453125, 55.58235168457031, 25.799278259277344, 29.389434814453125, 39.336219787597656, 12.508132934570312, 2.7946910858154297, 108.38638305664062, 4.793121337890625, -15.678466796875, 27.740440368652344, 86.517822265625, 34.31638717651367, 30.662887573242188, 21.175018310546875, 27.181961059570312, 12.672359466552734, 33.77744674682617, 29.50914764404297, 44.930931091308594, 55.131744384765625, 34.74280548095703, -9.199563980102539, 16.10946273803711, -21.58346176147461, -10.937362670898438, 70.10484313964844, 36.657684326171875, 54.326629638671875, 21.200828552246094, 9.377593994140625, 6.360073089599609, 49.378173828125, 7.2280426025390625, 40.36509704589844, 14.26641845703125, -23.97222900390625, 63.57585144042969, 57.476165771484375, 46.222564697265625, 64.74705505371094, -1.9930877685546875, 31.327665328979492, 51.86723709106445, 17.049671173095703, 48.74406433105469, 50.41056442260742, 39.111480712890625, 24.84423828125, 24.844776153564453, 38.16645812988281, 69.44758605957031, 56.099510192871094, 53.33493423461914, 60.47136688232422], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000509.npy"}
|
|
{"epoch": 0.7474302496328928, "step": 510, "batch_size": 64, "mean": 40.207786560058594, "std": 32.402069091796875, "min": -11.364791870117188, "p10": 0.1764179229736332, "median": 39.43504333496094, "p90": 78.62383346557617, "max": 115.10769653320312, "pos_frac": 0.90625, "sample": [-7.892448425292969, 23.740379333496094, 73.7044448852539, 53.80454635620117, 13.735023498535156, -7.480438232421875, 106.85943603515625, 46.05711364746094, 68.86537170410156, 70.75001525878906, 78.76160430908203, 15.033088684082031, 0.5559673309326172, 18.97899627685547, 71.46458435058594, 51.39504623413086, 48.67774963378906, -11.364791870117188, -6.3545379638671875, 71.69535064697266, 39.88658142089844, 27.74762725830078, 28.187026977539062, 5.37774658203125, 15.6787109375, 36.72275924682617, 61.99129104614258, 111.8809585571289, 67.6192626953125, 1.094696044921875, 115.10769653320312, 51.237857818603516, 13.629035949707031, 42.23268127441406, 12.80963134765625, 66.25727081298828, 41.51094055175781, 3.0330429077148438, 7.413166046142578, 25.136474609375, 19.743515014648438, 29.023269653320312, 78.3023681640625, 38.183074951171875, -5.122016906738281, -2.4141921997070312, 51.7579231262207, 42.82836151123047, 37.95249938964844, 76.28254699707031, 100.18867492675781, 7.507711410522461, 58.61114501953125, 84.78402709960938, 50.37554931640625, 57.95751190185547, 51.02366638183594, 55.77880859375, 0.013753890991210938, 14.862594604492188, 38.98350524902344, 101.43486022949219, 14.844329833984375, 14.853923797607422], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000510.npy"}
|
|
{"epoch": 0.748898678414097, "step": 511, "batch_size": 64, "mean": 36.059539794921875, "std": 29.902868270874023, "min": -14.783889770507812, "p10": -1.6879892349243157, "median": 35.783119201660156, "p90": 77.58738403320312, "max": 100.53355407714844, "pos_frac": 0.875, "sample": [5.17132568359375, 78.69699096679688, -1.1024551391601562, 25.334915161132812, 15.183954238891602, 45.81428527832031, 38.98695373535156, 73.59318542480469, 18.211212158203125, 38.571998596191406, 64.6230239868164, 15.014785766601562, 100.53355407714844, 35.05889892578125, -1.9389324188232422, 77.73233032226562, 3.595123291015625, 69.28754425048828, 13.96826171875, 77.17489624023438, 41.1595458984375, 56.52355194091797, 9.448265075683594, 23.360862731933594, 37.311302185058594, 34.617225646972656, -14.783889770507812, 85.41698455810547, -13.543685913085938, 64.520263671875, 67.22671508789062, 34.30671691894531, -6.632173538208008, 37.77081298828125, 57.64765548706055, 53.14459991455078, 95.4093246459961, 48.044227600097656, 3.295074462890625, 26.534975051879883, 57.77561950683594, 36.6175537109375, 33.21291732788086, 21.958656311035156, -5.0861053466796875, 77.24917602539062, -6.182624816894531, 0.5903148651123047, -3.4741878509521484, 36.50733947753906, 0.32160186767578125, 22.953575134277344, 55.122596740722656, 39.51692199707031, 53.249847412109375, 8.343048095703125, 29.608917236328125, 6.527076721191406, 99.55242919921875, 25.489219665527344, 13.575836181640625, 39.11906433105469, 44.39778518676758, 86.57389831542969], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000511.npy"}
|
|
{"epoch": 0.750367107195301, "step": 512, "batch_size": 64, "mean": 35.32998275756836, "std": 27.8614559173584, "min": -3.448699951171875, "p10": 3.6228862762451177, "median": 29.20815658569336, "p90": 76.27186431884768, "max": 117.85086059570312, "pos_frac": 0.9375, "sample": [81.75704956054688, 63.86602783203125, 28.29351806640625, 30.646286010742188, 4.0064544677734375, -2.4411773681640625, 43.865509033203125, 32.1419677734375, 5.789422988891602, 20.951467514038086, 18.777395248413086, 41.90167236328125, 1.3076648712158203, 52.15126419067383, -3.448699951171875, 21.01114273071289, 47.57987976074219, 48.98760223388672, 23.708602905273438, -2.465242385864258, 13.599800109863281, 78.78897857666016, -2.2796154022216797, 62.526695251464844, 117.85086059570312, 32.15283203125, 6.867343902587891, 27.228408813476562, 19.282455444335938, 20.97858428955078, 38.56000518798828, 53.487632751464844, 26.825971603393555, 54.631072998046875, 15.02414321899414, 51.4285888671875, 11.53483772277832, 20.536155700683594, 17.93750762939453, 58.65208435058594, 62.05464172363281, 31.26263427734375, 51.192806243896484, 86.79833984375, 4.092231750488281, 34.7535400390625, 46.085533142089844, 16.354997634887695, 4.8671417236328125, 70.39859771728516, 50.574554443359375, 6.1942291259765625, 80.46128845214844, 25.257083892822266, 30.12279510498047, 17.03343963623047, 54.266563415527344, 0.8277664184570312, 27.717437744140625, 25.3350830078125, 3.4584999084472656, 103.30435180664062, 95.08683013916016, 49.616363525390625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000512.npy"}
|
|
{"epoch": 0.7518355359765051, "step": 513, "batch_size": 64, "mean": 30.298439025878906, "std": 26.354843139648438, "min": -46.20642852783203, "p10": -2.846168518066406, "median": 31.060245513916016, "p90": 63.91311721801758, "max": 91.74008178710938, "pos_frac": 0.859375, "sample": [35.78162384033203, 15.680229187011719, 82.43345642089844, -2.995840072631836, 33.517578125, 39.438865661621094, 9.534614562988281, 35.51613998413086, -7.7887420654296875, 66.12124633789062, 27.524768829345703, 9.83416748046875, 47.04955291748047, 31.51494789123535, 3.1854190826416016, 63.61723327636719, 30.60554313659668, 8.024127960205078, 43.72584533691406, 62.21363830566406, 31.587631225585938, 45.392059326171875, 69.48556518554688, 91.74008178710938, 41.644325256347656, 8.775508880615234, -5.333591461181641, 24.96595001220703, 49.21669006347656, 29.015247344970703, 43.635711669921875, 7.936140060424805, 49.74700164794922, 52.68701171875, 28.358070373535156, 9.9727783203125, 46.69037628173828, -5.29931640625, 33.02510070800781, 84.24874877929688, 14.247661590576172, 12.714698791503906, 52.19847869873047, -15.793716430664062, 76.82740783691406, -0.7635498046875, 6.478057861328125, 10.825933456420898, 38.072486877441406, 39.60902404785156, 26.43572235107422, -2.4969348907470703, 64.03992462158203, 21.021743774414062, -6.966926574707031, 28.695419311523438, 13.454851150512695, 26.024404525756836, 35.075035095214844, 46.88087844848633, -46.20642852783203, 56.182861328125, 21.90264129638672, 48.61500549316406], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000513.npy"}
|
|
{"epoch": 0.7533039647577092, "step": 514, "batch_size": 64, "mean": 30.922880172729492, "std": 33.895538330078125, "min": -26.20556640625, "p10": -9.425224304199219, "median": 25.68124771118164, "p90": 74.94202957153321, "max": 136.52645874023438, "pos_frac": 0.828125, "sample": [-9.330215454101562, -3.275360107421875, 13.004661560058594, -17.358842849731445, 29.3748779296875, 26.138328552246094, 46.89716720581055, 1.270721435546875, 96.9827880859375, 64.9616928100586, 4.34825325012207, 43.65190887451172, -3.1593246459960938, 42.905731201171875, 42.42588806152344, 25.224166870117188, 26.588764190673828, 29.073522567749023, 43.54106521606445, 96.85445404052734, -13.523824691772461, 22.719642639160156, 47.934654235839844, 4.10807991027832, 13.564697265625, -10.8663330078125, 13.56365966796875, -9.695228576660156, 56.207557678222656, 30.297080993652344, 14.641935348510742, 66.1622085571289, 31.52740478515625, 40.442955017089844, 16.313522338867188, 28.64452362060547, 55.696685791015625, -4.0797576904296875, 111.58362579345703, -9.4659423828125, 24.99396514892578, 4.7955474853515625, -26.20556640625, 44.661956787109375, 17.729019165039062, -14.30990219116211, 15.620903015136719, 20.453628540039062, 3.51678466796875, 63.661346435546875, 21.22014617919922, 76.3613510131836, 9.497726440429688, 61.91758728027344, 136.52645874023438, 6.095653533935547, 61.10301208496094, 17.146591186523438, 93.39039611816406, 4.890289306640625, 92.04644775390625, 27.36211395263672, 39.0611572265625, 71.63027954101562], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000514.npy"}
|
|
{"epoch": 0.7547723935389133, "step": 515, "batch_size": 64, "mean": 33.66363525390625, "std": 27.529523849487305, "min": -12.849746704101562, "p10": 3.2429130554199226, "median": 30.475482940673828, "p90": 77.36729507446292, "max": 99.48031616210938, "pos_frac": 0.953125, "sample": [10.98550033569336, 31.06396484375, 64.0091552734375, 91.54501342773438, 15.329185485839844, 4.8730010986328125, 91.50833129882812, 50.39527893066406, 27.698135375976562, 15.443851470947266, 31.02618408203125, 21.069595336914062, 3.8018951416015625, 99.48031616210938, 60.52544403076172, 25.821693420410156, 41.761138916015625, 52.14618682861328, 54.79327392578125, 0.44628143310546875, 53.038299560546875, -2.9212207794189453, 32.62188720703125, 82.37057495117188, 30.048492431640625, 18.822853088378906, 80.01068115234375, 8.613758087158203, 29.159893035888672, 10.524389266967773, 58.27306365966797, 3.0033493041992188, 0.5748920440673828, 8.678268432617188, 48.82124328613281, 69.61231231689453, 8.824867248535156, 1.6013565063476562, 36.62367248535156, 10.141586303710938, 34.95069885253906, 43.02880096435547, 9.439502716064453, 15.883743286132812, 12.051658630371094, 18.633705139160156, 10.973060607910156, 97.52117156982422, 13.199665069580078, 6.249477386474609, 52.346778869628906, 71.19939422607422, -12.849746704101562, -1.2976818084716797, 45.68544006347656, 40.01023864746094, 41.133148193359375, 23.068695068359375, 84.7864990234375, 43.998512268066406, 23.403648376464844, 33.98646545410156, 33.99976348876953, 30.90247344970703], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000515.npy"}
|
|
{"epoch": 0.7562408223201175, "step": 516, "batch_size": 64, "mean": 35.44856262207031, "std": 28.498876571655273, "min": -7.7464141845703125, "p10": 1.3574897766113283, "median": 30.471290588378906, "p90": 75.1834426879883, "max": 115.98857116699219, "pos_frac": 0.90625, "sample": [30.054931640625, 55.277740478515625, 21.534698486328125, 13.00051498413086, 59.380287170410156, 41.32611846923828, 16.162261962890625, -7.7464141845703125, 30.887649536132812, 44.038909912109375, 58.413597106933594, 37.03758239746094, 48.563377380371094, 44.947967529296875, 19.517578125, 4.827598571777344, 55.03026580810547, 6.67633056640625, 37.007415771484375, 61.691009521484375, -6.2743072509765625, -0.6097412109375, 16.13538360595703, 41.32500076293945, 62.32012939453125, 19.8975830078125, 1.5634384155273438, 82.68301391601562, 115.98857116699219, 15.437568664550781, 12.60833740234375, 78.58221435546875, 112.71197509765625, 46.11822509765625, 80.4120101928711, 21.610687255859375, 11.320968627929688, -2.5039520263671875, 14.282752990722656, 32.94408416748047, 50.473289489746094, -3.8668994903564453, 19.754074096679688, 13.548419952392578, 22.528339385986328, 26.17816162109375, 28.749740600585938, 41.886260986328125, 20.523033142089844, 65.55906677246094, 40.99333953857422, 66.74073791503906, 12.19573974609375, 9.766708374023438, 78.15228271484375, 20.428741455078125, -4.9461212158203125, 10.525680541992188, 50.21710205078125, 68.25614929199219, 62.1162109375, 83.9107666015625, 49.56471633911133, 1.26922607421875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000516.npy"}
|
|
{"epoch": 0.7577092511013216, "step": 517, "batch_size": 64, "mean": 34.930450439453125, "std": 25.577362060546875, "min": -20.419769287109375, "p10": 3.9048402786254885, "median": 32.42748260498047, "p90": 66.3022674560547, "max": 99.5942611694336, "pos_frac": 0.921875, "sample": [22.93033218383789, 51.38365173339844, 30.830917358398438, 4.214210510253906, 31.965423583984375, 22.014190673828125, 29.031265258789062, 59.81985092163086, 65.94293212890625, -5.851068496704102, 40.772377014160156, 70.13655853271484, 60.13519287109375, 51.498111724853516, 46.91609191894531, 29.945701599121094, 36.06493377685547, 16.499969482421875, 28.729665756225586, 61.672325134277344, 36.38947296142578, 46.76982116699219, 46.72845458984375, 22.842910766601562, 35.009422302246094, 5.075384140014648, 26.02886199951172, 33.414100646972656, 26.1326904296875, 96.33760070800781, 13.555511474609375, 39.763938903808594, -4.315225601196289, 66.45626831054688, 14.011764526367188, 52.47541809082031, 16.6756591796875, 64.60926055908203, -20.419769287109375, -3.3677749633789062, 46.14866638183594, 28.691743850708008, 3.7722530364990234, 24.464454650878906, 38.06562805175781, 56.27385711669922, 99.5942611694336, 37.100921630859375, 16.16762924194336, 86.70309448242188, 32.88954162597656, 64.00390625, 29.986038208007812, 72.80265808105469, 23.443572998046875, 74.05563354492188, 41.973602294921875, 12.4400634765625, 5.563970565795898, 27.562454223632812, -18.533294677734375, 46.21550369262695, 1.510040283203125, 15.826179504394531], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000517.npy"}
|
|
{"epoch": 0.7591776798825257, "step": 518, "batch_size": 64, "mean": 31.17576789855957, "std": 26.12419319152832, "min": -19.29910659790039, "p10": -1.217963409423825, "median": 27.532428741455078, "p90": 71.54044113159179, "max": 102.85406494140625, "pos_frac": 0.890625, "sample": [19.22848129272461, 72.41767883300781, 27.754989624023438, 50.71565628051758, 48.60365295410156, 21.649864196777344, -5.369773864746094, 34.35527801513672, 25.243633270263672, 15.387985229492188, 43.669803619384766, 25.177650451660156, 31.767196655273438, 16.95319366455078, 47.34297180175781, 40.04914093017578, 47.378135681152344, 18.34192657470703, 12.975616455078125, 43.78961181640625, 61.93394470214844, -2.5654525756835938, -19.29910659790039, 23.77185821533203, -5.135501861572266, 42.276123046875, 82.00175476074219, 27.30986785888672, 12.434764862060547, 97.48042297363281, 72.4111557006836, 1.926177978515625, 71.34459686279297, 39.73957443237305, 12.812454223632812, 18.674346923828125, 32.48808670043945, 22.774688720703125, 49.14495849609375, 61.19282150268555, 5.040290832519531, 18.277984619140625, 28.004737854003906, 18.127052307128906, 12.359079360961914, 28.673675537109375, 6.029838562011719, -11.346675872802734, 31.45902442932129, 35.04167175292969, 4.1465911865234375, 11.139644622802734, -8.160003662109375, 40.71464538574219, -7.767704010009766, 75.47308349609375, 23.87253761291504, 39.2694091796875, 11.215518951416016, 102.85406494140625, 43.96569061279297, 19.84161376953125, 55.242645263671875, 71.62437438964844], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000518.npy"}
|
|
{"epoch": 0.7606461086637298, "step": 519, "batch_size": 64, "mean": 32.517459869384766, "std": 30.833126068115234, "min": -20.79126739501953, "p10": 3.587921142578125, "median": 26.6220703125, "p90": 73.58830490112307, "max": 138.8477020263672, "pos_frac": 0.9375, "sample": [10.852188110351562, 39.040245056152344, 24.118816375732422, 8.246513366699219, 96.37799072265625, 6.1593017578125, 65.62688446044922, 43.49481201171875, 90.23954772949219, 36.64691162109375, 43.59516906738281, 67.95453643798828, -10.433612823486328, 76.74679565429688, 45.07014465332031, 1.1853828430175781, 124.0179443359375, 28.411117553710938, 28.683578491210938, 42.20709991455078, 58.79296875, 8.97601318359375, 23.230865478515625, 13.1365966796875, 7.519683837890625, 35.00830078125, 34.04833984375, 16.699615478515625, 15.652397155761719, -7.351984024047852, 6.3131103515625, 76.00277709960938, 44.161346435546875, 8.245346069335938, 33.876495361328125, 62.87714385986328, 24.746994018554688, 7.996635437011719, 10.996849060058594, 29.443870544433594, 21.989395141601562, 18.897964477539062, 52.90837097167969, 5.33013916015625, -2.7817344665527344, 57.37422180175781, 81.25662231445312, 9.286865234375, 22.23419189453125, 37.27259826660156, 42.738922119140625, 138.8477020263672, 3.8423728942871094, 60.87306213378906, 3.5546875, 45.451416015625, 26.958175659179688, 3.66546630859375, 12.986320495605469, 26.285964965820312, 1.6796340942382812, 18.021930694580078, 34.619842529296875, -20.79126739501953], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000519.npy"}
|
|
{"epoch": 0.762114537444934, "step": 520, "batch_size": 64, "mean": 32.879398345947266, "std": 25.705875396728516, "min": -15.548225402832031, "p10": 4.343635368347168, "median": 30.304351806640625, "p90": 72.46442718505861, "max": 95.61735534667969, "pos_frac": 0.921875, "sample": [40.872169494628906, 30.113449096679688, 92.1671142578125, 10.233823776245117, 44.50389099121094, 41.669803619384766, 42.167625427246094, 17.161165237426758, 22.390830993652344, 30.495254516601562, -0.3066864013671875, -2.3740234375, 73.86990356445312, 4.3262786865234375, 4.384134292602539, 15.282188415527344, 62.615455627441406, 95.61735534667969, 32.77423095703125, 12.081661224365234, 59.649200439453125, 88.01513671875, -15.548225402832031, 22.921493530273438, 14.216140747070312, 63.39011001586914, 36.7947883605957, 80.75695037841797, 33.536041259765625, 37.45073318481445, 19.840652465820312, 22.94597625732422, 50.16166687011719, 32.748931884765625, 14.095687866210938, 39.50577163696289, 78.78091430664062, 38.478607177734375, 28.76030731201172, 40.75563049316406, 26.444171905517578, 23.095090866088867, 0.6799278259277344, 19.951171875, 29.730567932128906, 68.36168670654297, 9.489044189453125, 32.85303497314453, 81.45670318603516, -6.0802154541015625, 42.53175354003906, 58.09654235839844, 15.470321655273438, 15.186481475830078, -5.5746612548828125, 69.18498229980469, 11.048797607421875, 26.67194366455078, 31.710556030273438, 33.23712921142578, 33.36289978027344, 9.217437744140625, 16.065826416015625, 4.78814697265625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000520.npy"}
|
|
{"epoch": 0.7635829662261381, "step": 521, "batch_size": 64, "mean": 32.02197265625, "std": 26.55714988708496, "min": -12.641510009765625, "p10": 0.9970405578613294, "median": 28.202238082885742, "p90": 71.74784317016602, "max": 99.91277313232422, "pos_frac": 0.90625, "sample": [24.372940063476562, 80.92129516601562, 35.132568359375, 10.727622985839844, -5.159379959106445, -0.6948699951171875, 26.865493774414062, 11.823944091796875, 12.065155029296875, 29.538982391357422, 16.58224868774414, -12.641510009765625, -9.477203369140625, 26.62078857421875, 55.547706604003906, 99.91277313232422, 9.5037841796875, 9.156814575195312, -0.131805419921875, 87.83233642578125, 11.484859466552734, 12.830482482910156, 20.388872146606445, 11.818216323852539, 73.33222961425781, 36.067779541015625, 46.09556579589844, 9.105583190917969, 29.539566040039062, 18.095352172851562, 44.39784240722656, 53.673553466796875, 16.690773010253906, 15.141181945800781, 2.2708053588867188, 39.847747802734375, 13.825775146484375, 34.66398620605469, 65.18222045898438, 25.43415069580078, 19.027536392211914, 53.51046371459961, 40.39649200439453, 41.15815353393555, -7.108795166015625, 34.04644775390625, 5.011798858642578, 39.78416442871094, 8.20123291015625, 11.991531372070312, 55.850982666015625, 52.57789611816406, 86.61924743652344, 32.73103332519531, 0.451141357421875, 75.76480102539062, 44.35246276855469, 66.2802505493164, 16.931407928466797, 71.03157043457031, 39.224945068359375, 69.02088928222656, 32.109519958496094, 72.05481719970703], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000521.npy"}
|
|
{"epoch": 0.7650513950073421, "step": 522, "batch_size": 64, "mean": 27.99399185180664, "std": 26.318756103515625, "min": -13.91171646118164, "p10": 1.539336776733399, "median": 23.19931983947754, "p90": 60.11579170227051, "max": 101.2135009765625, "pos_frac": 0.90625, "sample": [13.117973327636719, 40.43242645263672, 34.295677185058594, 3.5716323852539062, 40.826080322265625, 23.02560043334961, 27.338348388671875, 25.67922019958496, 16.52548599243164, -13.91171646118164, 10.839069366455078, 16.343833923339844, 82.49423217773438, 2.5159912109375, 59.20134353637695, 12.103652954101562, 36.93929672241211, 1.3332481384277344, 36.30839538574219, 38.869483947753906, 47.319053649902344, 59.167396545410156, 43.043174743652344, 22.39299774169922, -4.983287811279297, 99.59895324707031, 31.575584411621094, 33.38890838623047, 88.41265869140625, 9.83740234375, 42.03572082519531, 29.179855346679688, 23.37303924560547, 4.079032897949219, 35.83502197265625, 3.5331573486328125, 12.342973709106445, 14.937799453735352, 44.3242073059082, 5.583526611328125, 3.4417800903320312, -0.42130279541015625, 25.051956176757812, -1.4100227355957031, 32.6461296081543, 34.22614288330078, 17.78870391845703, 19.785919189453125, 18.854320526123047, 101.2135009765625, 14.967971801757812, 26.963626861572266, 84.68406677246094, -3.6422557830810547, 7.324848175048828, 29.588768005371094, -8.398529052734375, 2.0202102661132812, 60.50769805908203, 12.708131790161133, 90.62350463867188, 21.147537231445312, 39.53253936767578, 9.583755493164062], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000522.npy"}
|
|
{"epoch": 0.7665198237885462, "step": 523, "batch_size": 64, "mean": 31.942184448242188, "std": 28.90403938293457, "min": -26.954513549804688, "p10": -1.9853578567504861, "median": 26.256103515625, "p90": 72.3850700378418, "max": 88.40420532226562, "pos_frac": 0.890625, "sample": [17.872406005859375, -3.8478775024414062, 20.337631225585938, 48.53794860839844, 70.2633056640625, 4.8070068359375, 46.0556640625, 88.40420532226562, 16.420303344726562, 40.1746826171875, -16.248716354370117, 55.15094757080078, 2.1013526916503906, 43.49430847167969, 22.12319564819336, 10.31110954284668, 55.96307373046875, 78.84228515625, 34.457088470458984, 11.381660461425781, 1.6184158325195312, 51.222991943359375, 45.6180305480957, 18.281625747680664, 39.39762496948242, 52.24729919433594, 2.716320037841797, 22.02154541015625, 10.464744567871094, 86.22187042236328, 22.426626205444336, 27.637605667114258, 5.62449836730957, -26.954513549804688, 61.47828674316406, 72.55555725097656, -3.4094676971435547, 0.10129737854003906, 66.97959899902344, 15.77659797668457, 35.22248840332031, 13.48497200012207, 21.579864501953125, 0.36438751220703125, 26.927474975585938, -2.879638671875, 67.876220703125, 33.05366516113281, 71.98726654052734, 10.617380142211914, 73.72132873535156, 30.07659149169922, 7.253746032714844, -4.24969482421875, 64.1263427734375, 24.82787322998047, 63.22430419921875, -9.754486083984375, 55.846946716308594, 88.17526245117188, 88.128662109375, 31.740039825439453, 8.735786437988281, 25.584732055664062], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000523.npy"}
|
|
{"epoch": 0.7679882525697503, "step": 524, "batch_size": 64, "mean": 29.516183853149414, "std": 24.76129913330078, "min": -30.669967651367188, "p10": 2.2377637863159183, "median": 27.177433967590332, "p90": 63.0700668334961, "max": 87.40853881835938, "pos_frac": 0.90625, "sample": [11.290702819824219, 81.72240447998047, 36.191131591796875, 30.434158325195312, 20.358726501464844, 21.601844787597656, 8.89471435546875, 1.986703872680664, 18.86731719970703, 9.499637603759766, 37.059425354003906, 12.093154907226562, 26.424224853515625, 71.30252838134766, 59.15666961669922, 36.70539093017578, 20.12432861328125, 20.020950317382812, 39.768310546875, 26.937667846679688, -30.669967651367188, 12.423515319824219, 45.783599853515625, 64.01835632324219, 6.994377136230469, 2.8235702514648438, 39.562156677246094, 51.779701232910156, 52.38744354248047, -22.352405548095703, 22.903057098388672, 5.7494354248046875, 7.699159622192383, 11.402292251586914, 87.40853881835938, -1.547262191772461, 33.002410888671875, 67.99908447265625, 49.70671081542969, 49.091651916503906, 29.55048370361328, -0.911834716796875, -8.143836975097656, -1.6460113525390625, 48.93067169189453, 26.22021484375, 9.123779296875, 9.732872009277344, 40.22196578979492, 46.82206726074219, 65.49343872070312, 44.627838134765625, 24.88995361328125, 8.133392333984375, 12.764541625976562, 38.65013122558594, 31.741287231445312, 82.56492614746094, 27.417200088500977, 60.857391357421875, 45.19847106933594, 30.754135131835938, 58.6668815612793, 10.77032470703125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000524.npy"}
|
|
{"epoch": 0.7694566813509545, "step": 525, "batch_size": 64, "mean": 35.2094841003418, "std": 23.788013458251953, "min": -5.002815246582031, "p10": 7.017336654663087, "median": 31.00523090362549, "p90": 76.61958923339844, "max": 88.61187744140625, "pos_frac": 0.953125, "sample": [29.907413482666016, 25.3641357421875, 28.96148681640625, 31.161117553710938, -3.176126480102539, 61.33103942871094, 58.74254608154297, 43.390037536621094, 80.09185791015625, 39.0642204284668, 6.667205810546875, 13.867786407470703, 5.5664825439453125, 50.256568908691406, 35.67437744140625, 5.894073486328125, 14.146682739257812, 6.031402587890625, 59.261688232421875, 26.802261352539062, 53.64178466796875, 10.703025817871094, 11.649589538574219, 54.603179931640625, 39.14967346191406, 25.48839569091797, 12.395980834960938, 45.388099670410156, 35.59294128417969, 31.314483642578125, 74.2445068359375, -5.002815246582031, 47.894287109375, 23.623661041259766, 53.30787658691406, 43.67205810546875, 16.516590118408203, 53.81256866455078, 19.371978759765625, 27.938186645507812, 77.93177795410156, 25.56402587890625, 20.269821166992188, -2.049955368041992, 26.957504272460938, 34.19459533691406, 38.596405029296875, 15.203372955322266, 37.147159576416016, 29.630638122558594, 71.78121948242188, 77.63748168945312, 11.876426696777344, 23.591293334960938, 88.61187744140625, 30.84934425354004, 34.47564697265625, 19.589149475097656, 13.132377624511719, 32.662193298339844, 79.9976806640625, 78.21546173095703, 7.834308624267578, 85.39485168457031], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000525.npy"}
|
|
{"epoch": 0.7709251101321586, "step": 526, "batch_size": 64, "mean": 34.531463623046875, "std": 27.9222412109375, "min": -19.695327758789062, "p10": -0.11722240447997345, "median": 31.96575927734375, "p90": 75.35458602905274, "max": 109.80825805664062, "pos_frac": 0.890625, "sample": [96.00753784179688, 89.79115295410156, 19.02224349975586, 13.585060119628906, 42.29460144042969, 10.949722290039062, -9.695215225219727, -3.3235034942626953, 41.95570373535156, -19.695327758789062, 50.717018127441406, 39.63258361816406, 40.172950744628906, 16.27271842956543, 49.77069854736328, 43.87841033935547, 23.99226188659668, 59.59393310546875, 31.181594848632812, 72.58299255371094, 6.974340438842773, 32.74992370605469, 15.84733772277832, 43.66344451904297, 85.42971801757812, 22.16615104675293, 109.80825805664062, 22.391586303710938, 43.382171630859375, 11.487977981567383, -3.156463623046875, 59.03538131713867, 18.62884521484375, 12.346549987792969, -6.47808837890625, 13.744720458984375, 47.488365173339844, 8.490917205810547, 14.119140625, 25.928417205810547, 41.596038818359375, 39.55218505859375, 52.66606140136719, 11.388824462890625, 20.37963104248047, 18.738845825195312, 60.444496154785156, 69.63782501220703, 22.795055389404297, 49.830074310302734, 23.078033447265625, 38.86381530761719, 17.9193115234375, 77.60357666015625, 52.555213928222656, 57.39041519165039, 76.54241180419922, 26.29570770263672, 34.27766418457031, 9.937202453613281, -6.783454895019531, 53.859107971191406, -12.138412475585938, 80.84825897216797], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000526.npy"}
|
|
{"epoch": 0.7723935389133627, "step": 527, "batch_size": 64, "mean": 32.13435363769531, "std": 28.15311050415039, "min": -17.984817504882812, "p10": 1.0183830261230498, "median": 26.386987686157227, "p90": 70.40259170532227, "max": 110.39907836914062, "pos_frac": 0.890625, "sample": [33.72443389892578, 55.557952880859375, 49.68627166748047, 57.820648193359375, 40.50361633300781, 29.159347534179688, 89.32999420166016, -3.5069007873535156, 29.47430419921875, 26.093326568603516, 36.07850646972656, -2.8073654174804688, 89.66126251220703, 11.51995849609375, 25.812049865722656, 3.9179153442382812, 67.19979095458984, -1.57574462890625, 6.772834777832031, 34.622528076171875, 3.95556640625, 33.920257568359375, 21.815364837646484, 9.211681365966797, 30.05515480041504, 69.75870513916016, -7.403034210205078, 16.117259979248047, 11.542999267578125, 35.70021057128906, 8.384544372558594, 41.86907196044922, 46.199859619140625, 50.99008560180664, 110.39907836914062, 55.71080017089844, 87.734619140625, 13.758779525756836, 24.702896118164062, 9.83807373046875, 62.852203369140625, 18.89128875732422, 99.17973327636719, 24.724334716796875, -11.935592651367188, 28.32994842529297, 31.352394104003906, 19.265167236328125, 18.608551025390625, 5.590789794921875, 39.16834259033203, 15.017974853515625, 22.11972427368164, 23.096946716308594, 26.680648803710938, 23.211776733398438, 92.97511291503906, 29.66248893737793, 21.306419372558594, 46.967281341552734, 13.756988525390625, -17.984817504882812, -0.224273681640625, 70.67854309082031], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000527.npy"}
|
|
{"epoch": 0.7738619676945668, "step": 528, "batch_size": 64, "mean": 30.600479125976562, "std": 25.045381546020508, "min": -9.9874267578125, "p10": 0.1583999633789076, "median": 27.308632850646973, "p90": 65.95662612915041, "max": 95.4383544921875, "pos_frac": 0.890625, "sample": [3.0584716796875, 51.93562316894531, 61.80455780029297, 27.67460823059082, 20.63812255859375, 2.7137451171875, 1.9261932373046875, 69.02778625488281, -9.9874267578125, 45.19068908691406, 25.984359741210938, 25.044960021972656, 26.942657470703125, 16.57571792602539, 35.89222717285156, 37.33234405517578, 31.740272521972656, 29.716140747070312, 67.736083984375, 84.713134765625, -0.7147121429443359, 46.3836669921875, 21.323959350585938, 21.418472290039062, 2.108236312866211, 95.4383544921875, 41.27909851074219, 93.52047729492188, -2.0660552978515625, 41.51941680908203, 10.958549499511719, 24.793533325195312, 18.69635009765625, 1.5425224304199219, 29.269363403320312, 41.60462188720703, -1.2180404663085938, 57.49029541015625, 40.78705596923828, 6.7350616455078125, 26.10332679748535, 43.29682159423828, 17.403701782226562, 37.431549072265625, 42.77100372314453, 22.55213165283203, 36.7705078125, 49.63909912109375, 28.034709930419922, 69.00437927246094, -3.1370849609375, 41.66365051269531, -6.653251647949219, 58.19956970214844, 14.087797164916992, 85.00856018066406, 38.160037994384766, 2.5924415588378906, -0.4347953796386719, 13.317474365234375, 19.348617553710938, 7.031749725341797, 53.48262023925781, 16.22571563720703], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000528.npy"}
|
|
{"epoch": 0.775330396475771, "step": 529, "batch_size": 64, "mean": 27.404434204101562, "std": 28.750995635986328, "min": -49.11228942871094, "p10": -3.0843292236328113, "median": 23.463945388793945, "p90": 58.57079963684083, "max": 106.96015167236328, "pos_frac": 0.859375, "sample": [-1.600738525390625, 4.394256591796875, -4.405326843261719, 13.03936767578125, 51.914154052734375, 23.499691009521484, 31.556808471679688, 17.414283752441406, 1.2264785766601562, 106.96015167236328, 18.45247459411621, 20.35495376586914, 20.04534912109375, 32.195579528808594, 37.44682693481445, 8.336599349975586, 37.486534118652344, 50.996673583984375, 13.761482238769531, 6.105949401855469, 13.716575622558594, 28.851394653320312, 100.8612060546875, 70.07234191894531, 35.68376159667969, -25.505149841308594, 23.428199768066406, 41.265480041503906, 15.048072814941406, 35.108360290527344, 35.63536834716797, 1.4837284088134766, 29.333772659301758, 50.02958679199219, 22.671493530273438, -8.864431381225586, 50.883087158203125, 59.5284309387207, 74.34938049316406, 51.629302978515625, 22.430618286132812, 36.846214294433594, -3.72015380859375, -49.11228942871094, 41.39186096191406, -16.69021987915039, 26.782503128051758, 19.372894287109375, 52.43528747558594, 13.659996032714844, 52.767547607421875, 32.02996063232422, 3.991914749145508, 19.772701263427734, 20.766799926757812, -0.09523773193359375, 1.6910934448242188, 97.35784912109375, 52.119659423828125, 56.336326599121094, 1.0880203247070312, -13.860809326171875, 25.89522933959961, 66.23434448242188], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000529.npy"}
|
|
{"epoch": 0.7767988252569751, "step": 530, "batch_size": 64, "mean": 36.536338806152344, "std": 27.414752960205078, "min": -22.098388671875, "p10": -1.9705184936523399, "median": 37.732479095458984, "p90": 71.42420349121095, "max": 100.0181884765625, "pos_frac": 0.890625, "sample": [71.85648345947266, 47.77762985229492, 20.56371307373047, 36.714149475097656, 52.57328796386719, 17.077383041381836, 69.8823471069336, 71.72711944580078, 14.991962432861328, 18.357681274414062, 44.032310485839844, -3.5865249633789062, 42.96722412109375, 32.05108642578125, 70.71739959716797, 42.33134460449219, 15.630189895629883, 50.05388641357422, 33.153541564941406, 40.27056884765625, 24.2314453125, 27.093345642089844, 93.94039916992188, 32.18596649169922, 28.480056762695312, 42.81988525390625, -22.098388671875, 76.60547637939453, 40.01378631591797, 53.61570739746094, 1.8001632690429688, 37.942344665527344, 100.0181884765625, 48.09059143066406, 45.6724853515625, -14.599796295166016, 42.70094299316406, 36.154335021972656, 20.8583984375, -10.593090057373047, 15.191225051879883, 42.763214111328125, 53.295555114746094, 82.22259521484375, -6.818708419799805, 16.84410858154297, 63.54017639160156, -16.644203186035156, 93.95512390136719, 21.82166290283203, 58.86892318725586, 6.336568832397461, 65.93525695800781, -9.498855590820312, 59.1153564453125, 15.481529235839844, 19.895889282226562, 37.522613525390625, 57.17867660522461, 55.99919891357422, 25.003490447998047, 45.99047088623047, 28.55023956298828, 11.70077133178711], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000530.npy"}
|
|
{"epoch": 0.7782672540381792, "step": 531, "batch_size": 64, "mean": 32.21106719970703, "std": 27.86233901977539, "min": -27.6522216796875, "p10": 1.2423950195312523, "median": 28.706939697265625, "p90": 70.45309066772461, "max": 106.68296813964844, "pos_frac": 0.90625, "sample": [44.04728698730469, -6.845888137817383, 24.46746826171875, 58.59044647216797, 36.389183044433594, -20.335357666015625, 5.977151870727539, 21.934616088867188, 59.63849639892578, 30.925216674804688, 11.971467971801758, 88.26051330566406, 0.3137493133544922, 25.538414001464844, 39.01850128173828, 12.875865936279297, 48.92529296875, 16.26422882080078, 6.4905242919921875, 24.516036987304688, 70.2763442993164, 46.22055435180664, 35.42039489746094, 28.300430297851562, 62.850791931152344, 43.71312713623047, 31.62911605834961, 43.92218017578125, 49.77124786376953, 8.426597595214844, 39.874107360839844, 28.811622619628906, 50.01490020751953, 15.722946166992188, 3.4092350006103516, 19.653076171875, 3.5113143920898438, 27.912841796875, -3.0337181091308594, 62.351341247558594, 28.602256774902344, 106.68296813964844, 70.63509368896484, 4.859100341796875, 90.7366943359375, -27.6522216796875, 34.720855712890625, 24.549850463867188, 75.66561889648438, 53.08311462402344, 27.579818725585938, 86.92403411865234, 70.52883911132812, 24.010883331298828, 48.4167366027832, 4.969608306884766, 41.071510314941406, 5.4031829833984375, -9.007293701171875, 15.402137756347656, 7.806358337402344, 44.8713493347168, 44.52778625488281, -10.601724624633789], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000531.npy"}
|
|
{"epoch": 0.7797356828193832, "step": 532, "batch_size": 64, "mean": 38.08259582519531, "std": 29.897003173828125, "min": -8.957740783691406, "p10": 2.6689653396606445, "median": 36.68658447265625, "p90": 78.51866760253907, "max": 154.45474243164062, "pos_frac": 0.921875, "sample": [74.22850036621094, 46.843055725097656, 63.39116668701172, 46.650550842285156, 98.34963989257812, 62.04400634765625, 80.5245361328125, 56.594879150390625, -4.122474670410156, 33.959476470947266, 10.673015594482422, 36.780738830566406, 38.14899444580078, 35.333900451660156, 21.164016723632812, 38.20098876953125, 93.02200317382812, -6.769996643066406, 50.170684814453125, 58.10376739501953, 41.79322814941406, 79.37368774414062, 76.52362060546875, 79.48419952392578, 69.84562683105469, 45.299224853515625, 6.274543762207031, 18.908180236816406, 82.50967407226562, -3.8324432373046875, 51.033348083496094, 2.8354110717773438, 10.285888671875, 45.91473388671875, 20.614656448364258, 2.5976314544677734, 42.571861267089844, 29.206268310546875, 47.99256134033203, 33.918731689453125, -8.957740783691406, 63.11981964111328, 3.4482498168945312, 37.249053955078125, 42.24516296386719, 17.853057861328125, 29.25334930419922, 50.40597915649414, 4.465354919433594, -3.2153167724609375, 9.86297607421875, 10.987207412719727, 30.898849487304688, 154.45474243164062, 45.27617645263672, 24.207984924316406, 36.592430114746094, 21.061012268066406, 31.11175537109375, 33.866275787353516, 13.571510314941406, 1.728179931640625, 40.04132080078125, 31.31664276123047], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000532.npy"}
|
|
{"epoch": 0.7812041116005873, "step": 533, "batch_size": 64, "mean": 26.412456512451172, "std": 24.175979614257812, "min": -34.97151184082031, "p10": -2.6069808959960934, "median": 24.64776039123535, "p90": 63.027478027343754, "max": 76.78076934814453, "pos_frac": 0.859375, "sample": [-2.7656097412109375, -9.005350112915039, 76.78076934814453, 27.771469116210938, -0.993408203125, 23.95557403564453, 37.96104431152344, 72.83106994628906, 25.15935516357422, 19.17119598388672, 62.52410888671875, 26.681373596191406, 60.24962615966797, 7.152629852294922, -9.348861694335938, 36.48747253417969, 65.35435485839844, 38.807777404785156, -7.270757675170898, 17.20260238647461, 8.894577026367188, 22.60516357421875, 32.103370666503906, 62.69482421875, 36.532379150390625, 66.6701889038086, 27.015195846557617, -34.97151184082031, 28.392902374267578, 66.11408996582031, 16.936737060546875, 51.137176513671875, 35.475650787353516, 15.449668884277344, 24.772136688232422, 10.738578796386719, 36.478668212890625, 26.088764190673828, 8.878936767578125, 8.225414276123047, 5.221202850341797, 3.222827911376953, 71.13175964355469, 55.60554504394531, -19.2108154296875, 43.03257751464844, 16.10077667236328, -2.236846923828125, 23.17212677001953, 8.0181884765625, 9.996461868286133, 28.731700897216797, 29.915786743164062, 7.5946807861328125, 19.075462341308594, 12.965507507324219, -5.147871017456055, 16.696762084960938, 45.705387115478516, 63.1700439453125, 22.904130935668945, 24.52338409423828, 49.000022888183594, 42.269142150878906], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000533.npy"}
|
|
{"epoch": 0.7826725403817915, "step": 534, "batch_size": 64, "mean": 32.42583465576172, "std": 27.588502883911133, "min": -13.564750671386719, "p10": 2.9154197692871096, "median": 25.40847396850586, "p90": 74.13613815307619, "max": 104.756103515625, "pos_frac": 0.9375, "sample": [35.90369415283203, 45.63658142089844, 2.8851776123046875, 76.07618713378906, 17.89703369140625, 23.091476440429688, 9.98583984375, 66.25714111328125, 18.51009750366211, 29.204923629760742, 8.35622787475586, 48.444549560546875, 2.9859848022460938, 11.495803833007812, 53.976829528808594, 104.756103515625, 49.030059814453125, 45.69352722167969, 45.31275177001953, 83.5997543334961, 7.195146560668945, -2.4904823303222656, 21.321380615234375, 25.93780517578125, 50.21912384033203, 56.66427230834961, 101.84442138671875, 15.520675659179688, 5.24749755859375, 16.73456573486328, 14.384653091430664, 42.05938720703125, 12.379362106323242, 18.56079864501953, 19.051868438720703, -13.564750671386719, 8.903579711914062, 69.4654541015625, 87.14856719970703, 71.1523208618164, 39.626922607421875, 25.287948608398438, 20.391626358032227, 32.36701965332031, 84.82392883300781, 25.52899932861328, 30.159366607666016, 2.3944053649902344, 12.09893798828125, 64.88374328613281, 75.4149169921875, -5.399290084838867, 28.336044311523438, 28.13985824584961, 41.006229400634766, 15.97052001953125, 4.728752136230469, 22.812175750732422, 48.64522933959961, 5.304231643676758, 51.62034606933594, -1.8844985961914062, 0.30666351318359375, 15.853973388671875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000534.npy"}
|
|
{"epoch": 0.7841409691629956, "step": 535, "batch_size": 64, "mean": 35.92974090576172, "std": 27.33642578125, "min": -18.123825073242188, "p10": 3.8485309600830084, "median": 36.855831146240234, "p90": 70.82077484130859, "max": 107.7199935913086, "pos_frac": 0.921875, "sample": [3.6503067016601562, 42.636474609375, 23.866607666015625, 9.488265991210938, 23.845489501953125, 13.161308288574219, 36.77162170410156, 95.62340545654297, 41.28668212890625, 14.554412841796875, 16.672168731689453, 57.76005554199219, 96.75413513183594, 82.87493133544922, 23.175094604492188, -3.7380104064941406, -8.8367919921875, 71.09112548828125, 36.44209289550781, 39.7752685546875, 73.72750091552734, 39.519439697265625, 11.687690734863281, 55.82115173339844, 51.775604248046875, 65.97765350341797, 27.624418258666992, 50.21654510498047, 7.259500503540039, 45.343414306640625, 51.45880126953125, 30.83940887451172, 55.00798034667969, 70.18995666503906, 41.009437561035156, 3.1558761596679688, 65.7889633178711, 44.88360595703125, 15.94122314453125, 41.59717559814453, 40.89717102050781, 81.49357604980469, 28.772998809814453, 64.6003189086914, 38.38934326171875, 47.83696746826172, 30.059364318847656, 61.030487060546875, 8.351211547851562, -14.600906372070312, 41.08202362060547, 21.20221710205078, 18.411231994628906, 4.311054229736328, 8.377105712890625, -18.123825073242188, 47.34960174560547, -0.6431808471679688, 107.7199935913086, 21.403413772583008, 4.332630157470703, 36.940040588378906, 32.56575012207031, 22.064743041992188], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000535.npy"}
|
|
{"epoch": 0.7856093979441997, "step": 536, "batch_size": 64, "mean": 40.71318817138672, "std": 27.575429916381836, "min": -5.480587005615234, "p10": 11.2330623626709, "median": 33.35405921936035, "p90": 77.01202926635743, "max": 114.6337890625, "pos_frac": 0.96875, "sample": [30.313512802124023, 0.7311630249023438, 45.749351501464844, 59.66990661621094, 109.31559753417969, 50.77240753173828, 27.246078491210938, 21.808807373046875, -5.480587005615234, 27.13054656982422, 15.691238403320312, 63.31812286376953, 20.93307113647461, 77.22793579101562, 22.199451446533203, 76.50824737548828, 72.64897155761719, 10.599641799926758, 78.8022232055664, 69.82173156738281, 26.929039001464844, 18.060501098632812, 29.800819396972656, 26.767932891845703, 71.8524398803711, 104.21029663085938, 9.615795135498047, 55.47593688964844, 49.908531188964844, -3.138580322265625, 114.6337890625, 59.23161315917969, 61.781890869140625, 43.22020721435547, 39.9696044921875, 25.93508529663086, 24.18402099609375, 97.36709594726562, 12.470989227294922, 10.702522277832031, 28.947769165039062, 17.756378173828125, 43.294898986816406, 32.053253173828125, 37.66725158691406, 6.308134078979492, 15.750350952148438, 35.63766860961914, 34.51679992675781, 47.753753662109375, 23.54520034790039, 13.339580535888672, 18.671035766601562, 48.551979064941406, 42.43849182128906, 86.88600158691406, 21.614479064941406, 32.53109359741211, 14.207881927490234, 29.85645294189453, 74.97969055175781, 58.46092987060547, 34.177024841308594, 52.71092224121094], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000536.npy"}
|
|
{"epoch": 0.7870778267254038, "step": 537, "batch_size": 64, "mean": 33.64003372192383, "std": 28.76498794555664, "min": -11.466026306152344, "p10": 0.9455032348632819, "median": 24.396183013916016, "p90": 73.92685623168946, "max": 104.32255554199219, "pos_frac": 0.921875, "sample": [-6.627662658691406, 27.498733520507812, 0.4426593780517578, -7.337984085083008, 8.599929809570312, 36.508514404296875, 12.063398361206055, 72.08287811279297, 20.799148559570312, 23.875019073486328, 45.495269775390625, 0.6831817626953125, 27.546127319335938, 9.07568359375, -11.466026306152344, 44.2125244140625, 85.932373046875, 2.289487838745117, 58.63697052001953, 11.919754028320312, 4.86723518371582, 67.28589630126953, 24.48308563232422, 8.745399475097656, 16.68524169921875, 17.822898864746094, 84.76567077636719, 28.165138244628906, 16.628917694091797, 26.002351760864258, 74.71713256835938, 57.64167785644531, 104.32255554199219, 14.979019165039062, 66.1947250366211, 71.01619720458984, 5.650690078735352, 66.43203735351562, 44.671043395996094, 79.26109313964844, 1.557586669921875, 80.79141235351562, 24.309280395507812, -6.57342529296875, 46.70125198364258, 24.119796752929688, 17.78281021118164, 16.499988555908203, 22.12360382080078, 93.00221252441406, 17.39771270751953, 63.76561737060547, 6.508995056152344, 22.03272247314453, 71.37174987792969, 13.554443359375, 60.15190124511719, 18.441205978393555, 39.574676513671875, 35.84727096557617, 31.921390533447266, 65.29815673828125, -0.20119667053222656, 44.415016174316406], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000537.npy"}
|
|
{"epoch": 0.788546255506608, "step": 538, "batch_size": 64, "mean": 32.44498062133789, "std": 25.140748977661133, "min": -15.597671508789062, "p10": 4.549973297119142, "median": 29.616347312927246, "p90": 67.47978897094728, "max": 111.19149780273438, "pos_frac": 0.953125, "sample": [44.22398376464844, 13.866546630859375, 16.06127166748047, 111.19149780273438, 72.41499328613281, 2.3616943359375, 33.054141998291016, -14.937461853027344, 31.196571350097656, 6.088203430175781, 40.51868438720703, 20.099517822265625, 58.866600036621094, 23.83740234375, 25.037220001220703, 23.970611572265625, 31.319900512695312, 16.73265838623047, 11.299957275390625, 78.25968933105469, 30.338088989257812, -1.2337417602539062, 23.253433227539062, 63.24082946777344, 39.51457595825195, 19.680419921875, 25.046295166015625, 91.67692565917969, 12.028411865234375, 15.148780822753906, 19.043716430664062, 79.37596893310547, 37.35023880004883, -15.597671508789062, 25.676055908203125, 29.08599281311035, 0.2104473114013672, 62.130126953125, 32.26249694824219, 22.985610961914062, 25.14177131652832, 17.96868133544922, 15.28536605834961, 59.72735595703125, 69.20703887939453, 84.32798767089844, 30.14670181274414, 45.81254577636719, 9.65019416809082, 12.574256896972656, 38.372467041015625, 40.91889572143555, 37.04413604736328, 10.914241790771484, 31.311891555786133, 24.720664978027344, 63.44953918457031, 31.117084503173828, 62.229339599609375, 38.554412841796875, 30.28466796875, 3.8907318115234375, 34.545806884765625, 2.602384567260742], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000538.npy"}
|
|
{"epoch": 0.7900146842878121, "step": 539, "batch_size": 64, "mean": 31.052791595458984, "std": 27.054018020629883, "min": -23.92156219482422, "p10": 2.5589544296264655, "median": 25.837346076965332, "p90": 66.61943359375, "max": 105.99625396728516, "pos_frac": 0.90625, "sample": [56.409339904785156, 43.720733642578125, 3.0064773559570312, 11.501852035522461, 11.037487030029297, 48.23390197753906, -11.045583724975586, 36.440765380859375, 6.2935791015625, 32.228172302246094, 21.546615600585938, 2.367158889770508, 15.301361083984375, 29.357742309570312, 65.21063232421875, 30.504196166992188, 63.59192657470703, 30.567245483398438, 20.6702880859375, 84.76534271240234, 20.897781372070312, 26.7406005859375, 81.20069885253906, 65.35626220703125, 26.35123634338379, -23.92156219482422, 20.31885528564453, 67.46067810058594, 31.484689712524414, 7.8818511962890625, 4.858642578125, 9.557518005371094, 14.538909912109375, 76.970947265625, 21.569183349609375, 10.426048278808594, 13.410247802734375, 58.931732177734375, 19.909324645996094, 6.927055358886719, -8.818981170654297, 105.99625396728516, 4.459228515625, 55.1771354675293, 65.62303161621094, -1.7929401397705078, 66.58604431152344, 25.323455810546875, 50.42359924316406, -14.482994079589844, 22.907085418701172, -2.7170944213867188, 72.87860107421875, 43.84925842285156, 59.239990234375, 21.581069946289062, 66.63374328613281, 12.68951416015625, 35.09916687011719, 35.64848327636719, 17.563146591186523, 44.45440673828125, 28.668357849121094, 17.80902671813965], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000539.npy"}
|
|
{"epoch": 0.7914831130690162, "step": 540, "batch_size": 64, "mean": 35.191566467285156, "std": 28.06586265563965, "min": -22.67938232421875, "p10": 3.350523376464848, "median": 32.72922897338867, "p90": 68.78761978149414, "max": 100.15664672851562, "pos_frac": 0.90625, "sample": [32.70149230957031, 32.75696563720703, 73.88893127441406, 28.41002655029297, 49.71864318847656, 15.885337829589844, 24.26031494140625, -14.109954833984375, -22.67938232421875, -16.997344970703125, -18.734956741333008, 40.04216003417969, 25.916885375976562, 27.725616455078125, 66.16903686523438, 67.32860565185547, 36.454856872558594, 10.276641845703125, 32.61058044433594, 99.67068481445312, 33.463958740234375, 15.518592834472656, 27.998374938964844, 66.36007690429688, 42.00422668457031, 37.328399658203125, 53.79393005371094, 97.41559600830078, 50.12868881225586, 42.686676025390625, 23.58917236328125, 54.45295333862305, 11.522912979125977, 21.814796447753906, 11.283515930175781, 37.18767547607422, 18.428123474121094, 11.953689575195312, 68.66661834716797, 47.30719757080078, 7.567047119140625, 89.48265075683594, 59.75132751464844, 100.15664672851562, -0.8368949890136719, 70.4180908203125, 20.83111572265625, 61.815399169921875, 38.694496154785156, 33.688720703125, 8.760498046875, 39.18528747558594, 63.917327880859375, 20.29559326171875, 18.820106506347656, 20.73284912109375, 57.012054443359375, 20.241472244262695, 49.906494140625, 15.241832733154297, -10.646507263183594, 68.8394775390625, 32.641456604003906, 1.5434417724609375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000540.npy"}
|
|
{"epoch": 0.7929515418502202, "step": 541, "batch_size": 64, "mean": 30.185348510742188, "std": 24.248565673828125, "min": -15.55767822265625, "p10": -0.053060913085936356, "median": 26.09178924560547, "p90": 63.64888992309572, "max": 88.49957275390625, "pos_frac": 0.890625, "sample": [65.48184204101562, 40.743812561035156, 46.312828063964844, 66.20816040039062, 14.82080078125, 20.514511108398438, 40.93858337402344, 18.486087799072266, 28.498153686523438, 14.35772705078125, -4.073150634765625, 87.73865509033203, -11.129341125488281, 39.79731750488281, 23.541473388671875, 31.250411987304688, 41.607879638671875, 88.49957275390625, 28.86614227294922, 10.612869262695312, 84.61431884765625, 59.37200164794922, 24.532699584960938, 10.572097778320312, 25.972259521484375, 11.441154479980469, -10.886756896972656, 10.384529113769531, -9.165611267089844, 80.55709838867188, 21.057403564453125, 21.593460083007812, 29.665252685546875, 17.112991333007812, 1.0886154174804688, 29.343223571777344, 34.60917663574219, 46.8374137878418, 54.762062072753906, 10.743270874023438, 14.18048095703125, 35.83576202392578, 17.338760375976562, 55.45426940917969, 21.229721069335938, 70.79302215576172, 30.25869369506836, 16.09893798828125, 47.89363098144531, 43.6866455078125, -0.5423507690429688, 39.428192138671875, 15.03570556640625, 55.95393371582031, 54.521240234375, 24.396976470947266, -0.821044921875, 22.95410919189453, -15.55767822265625, 15.298370361328125, 11.12237548828125, 26.211318969726562, 28.312103271484375, 55.4981803894043], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000541.npy"}
|
|
{"epoch": 0.7944199706314243, "step": 542, "batch_size": 64, "mean": 34.35010528564453, "std": 25.037315368652344, "min": -16.164527893066406, "p10": 1.9164722442626965, "median": 34.52328109741211, "p90": 71.85996322631836, "max": 88.80181884765625, "pos_frac": 0.9375, "sample": [25.74597930908203, 45.83019256591797, 75.97552490234375, 20.481788635253906, 0.7224349975585938, 81.22218322753906, 22.801145553588867, 71.259033203125, 76.0982666015625, 12.23004150390625, 61.592613220214844, 81.45332336425781, 31.988208770751953, 1.4343109130859375, 40.6661376953125, 25.920324325561523, 88.80181884765625, 56.69621276855469, 0.408416748046875, 28.73413848876953, 9.815444946289062, 35.250396728515625, 33.796165466308594, 24.602802276611328, 9.2352294921875, 44.6048583984375, 21.013168334960938, 41.148597717285156, 4.06939697265625, 54.882423400878906, 5.615814208984375, 47.7139892578125, 71.71238708496094, -2.8453826904296875, 52.804595947265625, 3.332172393798828, 40.945777893066406, 30.963890075683594, 3.041515350341797, 37.04716873168945, 24.83255386352539, 44.825408935546875, 22.24614906311035, -16.164527893066406, 31.361618041992188, 39.54107666015625, -4.17448616027832, 47.425994873046875, 73.75437927246094, 10.923524856567383, 41.47478103637695, 62.69877243041992, 26.235244750976562, 41.5941047668457, 24.65320587158203, 35.5037841796875, 6.99818229675293, -5.835615158081055, 71.92321014404297, 41.606773376464844, 17.158830642700195, 35.65245056152344, 37.795066833496094, 67.56369018554688], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000542.npy"}
|
|
{"epoch": 0.7958883994126285, "step": 543, "batch_size": 64, "mean": 25.586671829223633, "std": 25.67181396484375, "min": -24.753982543945312, "p10": -2.599276733398434, "median": 21.897674560546875, "p90": 57.759983062744155, "max": 109.81005859375, "pos_frac": 0.890625, "sample": [7.517768859863281, -5.558265686035156, -6.179679870605469, 15.142776489257812, 27.89271354675293, 72.55261993408203, 1.5431632995605469, 20.203140258789062, 3.5451507568359375, 47.778968811035156, 9.21624755859375, 5.81683349609375, 4.567432403564453, 17.77103042602539, 54.69617462158203, 43.94823455810547, 39.729766845703125, 101.99784851074219, -5.792171478271484, 1.0398025512695312, 29.114761352539062, 24.08759307861328, 14.451427459716797, 13.606435775756836, -24.753982543945312, 19.45415496826172, 11.03912353515625, 70.87643432617188, 39.443443298339844, 26.618072509765625, 33.8216552734375, 15.409591674804688, -8.425994873046875, 61.39507293701172, 38.03656005859375, -4.158882141113281, 109.81005859375, 35.39007568359375, 59.07304382324219, 10.298965454101562, 48.555450439453125, 26.404190063476562, 18.843563079833984, 14.571144104003906, 26.8255615234375, 13.818107604980469, 35.333744049072266, 69.265380859375, -24.327110290527344, 1.4760894775390625, 29.10411834716797, 16.687049865722656, 44.58113479614258, 28.59178924560547, 36.32971954345703, 24.385887145996094, 23.592208862304688, 31.65100860595703, 3.7185516357421875, 19.174087524414062, 7.888208389282227, 42.174644470214844, 14.341573715209961, 52.543701171875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000543.npy"}
|
|
{"epoch": 0.7973568281938326, "step": 544, "batch_size": 64, "mean": 32.87367248535156, "std": 23.992013931274414, "min": -16.739727020263672, "p10": 5.802971076965332, "median": 31.42905616760254, "p90": 59.252587890625, "max": 124.5787353515625, "pos_frac": 0.984375, "sample": [66.50517272949219, 22.608489990234375, 47.83511734008789, 45.927490234375, 124.5787353515625, 2.5254058837890625, 43.18980407714844, 24.716556549072266, 47.4545783996582, 6.054681777954102, 91.43521118164062, 10.261213302612305, 3.3886260986328125, 34.74676513671875, 30.044227600097656, 28.380939483642578, 15.076713562011719, 24.37744140625, 2.120361328125, 59.72343444824219, 11.06594467163086, 51.66602325439453, 12.571489334106445, 17.81616973876953, 45.98480987548828, 13.706535339355469, 39.48457336425781, 31.531787872314453, 19.69385528564453, 17.014535903930664, 58.15394592285156, 44.38923645019531, 33.91559600830078, 18.10796356201172, 34.6861572265625, 44.36857223510742, 20.001693725585938, 17.851835250854492, 23.44515609741211, 4.94694709777832, 28.596965789794922, 38.797489166259766, 45.274391174316406, 60.21722412109375, 49.201995849609375, 38.02132034301758, 45.8641357421875, 42.672019958496094, 14.179420471191406, 31.412723541259766, 6.242366790771484, 31.445388793945312, 30.139053344726562, 73.6917724609375, 58.01771545410156, 5.695095062255859, 42.69678497314453, 4.3859405517578125, 48.23207092285156, 9.830772399902344, 33.59619140625, 83.84347534179688, -16.739727020263672, 7.24647331237793], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000544.npy"}
|
|
{"epoch": 0.7988252569750367, "step": 545, "batch_size": 64, "mean": 33.994972229003906, "std": 26.682092666625977, "min": -11.536384582519531, "p10": 2.5105293273925784, "median": 33.33341026306152, "p90": 64.93659896850586, "max": 114.582763671875, "pos_frac": 0.9375, "sample": [0.8010311126708984, -3.871337890625, -1.9139404296875, -11.536384582519531, 5.953090667724609, 55.25334548950195, 64.94009399414062, 6.670049667358398, 61.682373046875, 21.53089141845703, 32.6534538269043, 39.813499450683594, -2.5295333862304688, 114.582763671875, 65.30868530273438, 2.4387130737304688, 40.509674072265625, 79.60137176513672, 40.98596954345703, 44.04677963256836, 84.27633666992188, 39.4493293762207, 63.51728820800781, 4.199745178222656, 25.752395629882812, 5.934116363525391, 2.6781005859375, 45.79998779296875, 24.961959838867188, 4.676206588745117, 39.79405212402344, 29.798377990722656, 10.16731071472168, 19.803062438964844, 34.1630859375, 60.065513610839844, 49.77886962890625, 19.604957580566406, 14.33681869506836, 60.725616455078125, 7.332221984863281, 62.743553161621094, 71.6892318725586, 32.67189407348633, 6.207996368408203, 52.15132141113281, 36.641212463378906, 64.9284439086914, 64.04866027832031, 35.30126190185547, 11.892881393432617, 14.759628295898438, 18.075435638427734, 47.575416564941406, 49.33441162109375, 24.730117797851562, 83.65023803710938, 0.576385498046875, 52.7994499206543, 33.99492645263672, 29.726638793945312, 15.65069580078125, 3.6044235229492188, 59.18788146972656], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000545.npy"}
|
|
{"epoch": 0.8002936857562408, "step": 546, "batch_size": 64, "mean": 33.39392852783203, "std": 23.92932891845703, "min": -21.62310791015625, "p10": 4.795319747924806, "median": 28.33156108856201, "p90": 71.48172073364259, "max": 82.4500732421875, "pos_frac": 0.953125, "sample": [15.079681396484375, 14.552513122558594, 78.19349670410156, 24.853797912597656, 36.240867614746094, 46.7031364440918, 4.322254180908203, 27.961454391479492, 41.49315643310547, 20.89093017578125, 21.824710845947266, 10.568351745605469, 17.951934814453125, 41.06153106689453, 36.91754150390625, 17.833553314208984, 31.40186309814453, 43.308494567871094, 41.166160583496094, 3.6982955932617188, 5.899139404296875, 76.54070281982422, -4.543785095214844, 73.63453674316406, 32.82884216308594, 15.010360717773438, 56.91987991333008, 33.75254821777344, 81.49784088134766, 22.49634552001953, 7.0110626220703125, 11.856491088867188, 26.20693588256836, 55.77913284301758, 28.70166778564453, 2.2590179443359375, 58.50156784057617, 16.75390625, 69.59215545654297, 82.4500732421875, -21.62310791015625, 50.950836181640625, 63.4259033203125, 24.221717834472656, 35.27630615234375, 59.15608215332031, -2.5178489685058594, 72.29153442382812, 49.500213623046875, 36.90573501586914, 22.652137756347656, 53.61293029785156, 27.295467376708984, 77.29606628417969, 25.841989517211914, 42.51581573486328, 18.884315490722656, 3.4581336975097656, 8.221134185791016, 25.177902221679688, 14.53892707824707, 63.47605895996094, 39.023048400878906, 18.457870483398438], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000546.npy"}
|
|
{"epoch": 0.801762114537445, "step": 547, "batch_size": 64, "mean": 30.772075653076172, "std": 30.483257293701172, "min": -37.167205810546875, "p10": -1.0118959426879854, "median": 29.83831787109375, "p90": 61.587267303466795, "max": 124.95426940917969, "pos_frac": 0.890625, "sample": [2.175027847290039, 53.06367492675781, -37.167205810546875, 104.92915344238281, 9.777111053466797, 12.902984619140625, 47.04048156738281, 21.687110900878906, 10.14166259765625, 61.092254638671875, 26.584503173828125, 119.64968872070312, 34.630126953125, 124.95426940917969, 36.18603515625, -12.136039733886719, 72.58563232421875, 34.260658264160156, -13.203544616699219, 13.211250305175781, 18.469078063964844, 16.96912384033203, 61.799415588378906, 40.99213790893555, 30.247314453125, 10.780773162841797, 74.8111572265625, 56.38539123535156, 35.180908203125, 33.710975646972656, 84.92926788330078, 34.91565704345703, 5.1950225830078125, 55.534568786621094, 24.056209564208984, 5.755941390991211, 4.685035705566406, 1.754852294921875, 29.4293212890625, -8.187286376953125, 41.95697021484375, 42.55064392089844, 43.57783126831055, 27.827590942382812, 28.358009338378906, 33.8408088684082, 6.155803680419922, 58.154541015625, 5.188650131225586, 55.71806335449219, 23.727767944335938, 6.9276123046875, 40.56224060058594, 36.92570495605469, 3.257291793823242, 30.739852905273438, 39.07025146484375, 18.0440673828125, 52.800254821777344, -4.9204864501953125, 49.19349670410156, -12.11822509765625, 4.292093276977539, -2.1976451873779297], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000547.npy"}
|
|
{"epoch": 0.8032305433186491, "step": 548, "batch_size": 64, "mean": 34.18475341796875, "std": 28.600631713867188, "min": -19.029037475585938, "p10": 0.1901634216308598, "median": 32.903873443603516, "p90": 66.87145919799806, "max": 131.13461303710938, "pos_frac": 0.90625, "sample": [45.61798095703125, 30.672943115234375, 23.298080444335938, 37.98969650268555, 35.39631271362305, 0.6137619018554688, -13.173660278320312, 107.61024475097656, 63.52149200439453, 43.398048400878906, 41.89598846435547, 23.20539093017578, 32.015602111816406, 60.6612548828125, 47.5472412109375, -19.029037475585938, 32.26862335205078, 59.134521484375, 40.949546813964844, 14.210577011108398, 48.81835174560547, 83.65248107910156, 46.82183837890625, 22.905715942382812, 38.20994567871094, 7.805091857910156, 38.976348876953125, 11.681665420532227, 35.970497131347656, 30.419357299804688, -1.2583770751953125, 131.13461303710938, 51.786407470703125, 26.752044677734375, 35.00614929199219, -2.3835887908935547, 6.0411224365234375, 68.93998718261719, 2.965057373046875, 24.518447875976562, 39.66659164428711, -4.321414947509766, 5.925689697265625, 26.90778350830078, 6.239095687866211, 10.828872680664062, 16.685989379882812, 13.645294189453125, 40.232261657714844, -10.096328735351562, 46.87391662597656, 68.30715942382812, 0.0086212158203125, 103.3177490234375, 25.559066772460938, 28.59915542602539, 35.18074035644531, 81.96029663085938, 30.34716796875, 51.013954162597656, 11.849128723144531, 33.53912353515625, 60.28289031982422, 48.70368576049805], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000548.npy"}
|
|
{"epoch": 0.8046989720998532, "step": 549, "batch_size": 64, "mean": 34.86151123046875, "std": 25.489124298095703, "min": -18.564903259277344, "p10": 4.038794708251956, "median": 32.99834442138672, "p90": 65.66659469604492, "max": 111.13858032226562, "pos_frac": 0.921875, "sample": [32.84370422363281, 111.13858032226562, 22.024703979492188, 2.762451171875, 59.64579772949219, 74.16119384765625, 33.31620788574219, 67.67070770263672, 51.98634338378906, 46.10649871826172, -1.7220191955566406, 33.17811584472656, 38.03430938720703, 16.380874633789062, 15.777580261230469, 9.96466064453125, 52.679710388183594, -1.9837589263916016, -1.104583740234375, 38.00633239746094, 44.628379821777344, 29.600507736206055, 0.15621185302734375, 59.967620849609375, 46.32416915893555, 20.652729034423828, 23.06128692626953, 7.016929626464844, 59.11322784423828, 41.439697265625, 9.926193237304688, 9.587047576904297, 46.691062927246094, 38.76014709472656, 28.21752166748047, 29.192344665527344, 7.233526229858398, 66.38616943359375, 17.828285217285156, 18.418941497802734, -0.30454254150390625, 29.569164276123047, 63.987586975097656, 32.333683013916016, 104.92596435546875, 37.79817199707031, 31.725296020507812, 57.67111587524414, 49.412200927734375, 53.34686279296875, 47.2646484375, -18.564903259277344, 17.618322372436523, 29.112716674804688, 55.313232421875, 11.566509246826172, 33.658912658691406, 23.420307159423828, 33.152984619140625, 61.932518005371094, 7.14898681640625, 68.34477233886719, 15.483642578125, 80.1491928100586], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000549.npy"}
|
|
{"epoch": 0.8061674008810573, "step": 550, "batch_size": 64, "mean": 27.775787353515625, "std": 24.132396697998047, "min": -16.012706756591797, "p10": -1.8939859390258769, "median": 27.042296409606934, "p90": 63.41676292419434, "max": 86.10997772216797, "pos_frac": 0.890625, "sample": [29.80660057067871, 28.283658981323242, 23.923446655273438, 10.148880004882812, 15.67315673828125, 61.506988525390625, 30.091625213623047, 43.32746887207031, 16.34000015258789, 16.507606506347656, 62.24753189086914, 20.68408203125, 74.70690155029297, 33.6294059753418, 14.598983764648438, 14.004554748535156, 45.43895721435547, -16.012706756591797, 20.134246826171875, 38.6733283996582, -13.547210693359375, 4.754539489746094, 70.55899047851562, 16.906339645385742, 2.3245506286621094, 30.7919921875, 8.482498168945312, 48.35679244995117, 42.986454010009766, 25.800933837890625, -4.990264892578125, 17.50799560546875, 28.31922149658203, 1.0043563842773438, 36.39863586425781, 75.12651824951172, 40.57893371582031, 33.64130401611328, 86.10997772216797, 14.540088653564453, 48.05177688598633, 29.098472595214844, 6.561130523681641, -7.0459747314453125, 36.89198303222656, 40.20953369140625, 41.55126953125, 63.91786193847656, 3.6005401611328125, 43.83281326293945, -2.8059616088867188, 69.06498718261719, 0.23395729064941406, 48.03662109375, 2.6533889770507812, 16.333236694335938, 37.79798889160156, 59.91339111328125, 24.17253875732422, 1.5245361328125, -6.4642333984375, -3.097198486328125, 66.1610107421875, 8.089263916015625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000550.npy"}
|
|
{"epoch": 0.8076358296622613, "step": 551, "batch_size": 64, "mean": 35.135276794433594, "std": 26.755786895751953, "min": -9.91290283203125, "p10": 3.493674659729005, "median": 33.429561614990234, "p90": 72.58399429321292, "max": 103.28915405273438, "pos_frac": 0.9375, "sample": [22.30221939086914, 27.93878936767578, 39.63220977783203, 10.450035095214844, 52.683258056640625, 92.93846893310547, -3.797943115234375, 60.08882522583008, 43.00921630859375, 33.019493103027344, 36.01988220214844, 34.09882354736328, 2.7948455810546875, 85.77253723144531, 58.8712158203125, 60.281707763671875, 65.56343841552734, -7.4368896484375, 28.88408660888672, 84.037353515625, 25.198883056640625, 55.47758483886719, 62.18406677246094, 103.28915405273438, 41.246856689453125, 9.843032836914062, 75.59280395507812, 48.53614807128906, -3.8112926483154297, 64.81124877929688, 39.251502990722656, 7.630794525146484, 38.677886962890625, 23.998626708984375, 35.01081848144531, 38.49923324584961, 31.439529418945312, 33.839630126953125, 35.14935302734375, 1.0826396942138672, 8.309783935546875, 4.6519775390625, 16.300155639648438, 18.502227783203125, 8.89516830444336, 92.48091125488281, 10.794952392578125, 5.273616790771484, 65.13197326660156, 2.9972591400146484, 8.087455749511719, 25.970993041992188, 52.972816467285156, 26.168289184570312, 26.33061981201172, -9.91290283203125, 15.288963317871094, 35.49842071533203, 52.23723602294922, 25.819358825683594, 25.08028793334961, 81.34125518798828, 37.150054931640625, 19.18668556213379], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000551.npy"}
|
|
{"epoch": 0.8091042584434655, "step": 552, "batch_size": 64, "mean": 39.607852935791016, "std": 31.18678092956543, "min": -5.563259124755859, "p10": 9.769074821472168, "median": 31.293537139892578, "p90": 72.88284912109376, "max": 152.21182250976562, "pos_frac": 0.96875, "sample": [27.160369873046875, 34.91820526123047, 9.663818359375, 73.44393920898438, 26.023414611816406, 90.34991455078125, 56.260986328125, 25.98980712890625, 39.742210388183594, 29.894088745117188, 60.038177490234375, 63.744171142578125, -5.563259124755859, 36.426666259765625, 71.57363891601562, 65.4028549194336, 43.162193298339844, 91.09326934814453, 134.3990020751953, 11.091148376464844, 19.010093688964844, 74.78755187988281, 40.015872955322266, 29.134567260742188, 7.369270324707031, 10.567150115966797, 13.348724365234375, 43.116249084472656, 0.10970115661621094, 13.063362121582031, 61.25775146484375, 7.588623046875, 31.80316162109375, 25.22644805908203, 22.320457458496094, 23.409446716308594, 59.40556335449219, 55.88927459716797, 152.21182250976562, 41.89508056640625, 42.083229064941406, 31.411399841308594, 43.53141784667969, 64.89720153808594, 67.27288818359375, 118.03335571289062, 28.406753540039062, 11.007293701171875, 63.0086669921875, 59.50054931640625, 9.629913330078125, 13.454761505126953, 13.141227722167969, 26.856403350830078, 15.215805053710938, 31.175674438476562, -3.4855194091796875, 35.17103576660156, 10.014673233032227, 15.390411376953125, 25.71259307861328, 27.183395385742188, 55.64649963378906, 14.298149108886719], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000552.npy"}
|
|
{"epoch": 0.8105726872246696, "step": 553, "batch_size": 64, "mean": 30.661273956298828, "std": 25.554927825927734, "min": -13.285682678222656, "p10": 1.5409591674804697, "median": 26.372347831726074, "p90": 68.4598976135254, "max": 102.76478576660156, "pos_frac": 0.921875, "sample": [100.21258544921875, 58.67072296142578, 25.77642822265625, 25.932281494140625, 27.136383056640625, 6.418663024902344, 18.314422607421875, 0.6119613647460938, 24.500661849975586, 14.835931777954102, 19.592514038085938, 17.15515899658203, 38.43480682373047, 19.76523208618164, -2.5882720947265625, 28.14405059814453, 7.165904998779297, 8.69366455078125, 65.21720123291016, -6.686716079711914, 13.718994140625, 44.174713134765625, 36.721588134765625, 5.242059707641602, 21.28614044189453, 44.63447570800781, 90.29873657226562, 9.549369812011719, 27.091697692871094, 59.35613250732422, -5.6136627197265625, 40.114261627197266, 70.65791320800781, -3.7234344482421875, 64.18419647216797, 20.197113037109375, 36.413726806640625, 30.13311767578125, 40.99774169921875, 1.1420135498046875, 33.550201416015625, 30.433555603027344, 26.812414169311523, 20.690460205078125, 2.471832275390625, 3.0839767456054688, 23.619354248046875, 11.104728698730469, 69.84962463378906, 36.12556076049805, 73.82837677001953, 18.01651382446289, 41.2847900390625, 47.41693115234375, 73.14669799804688, 102.76478576660156, 39.44098663330078, 54.646240234375, 29.34864044189453, 20.916183471679688, -13.285682678222656, 12.806652069091797, 21.19249725341797, 39.17579650878906], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000553.npy"}
|
|
{"epoch": 0.8120411160058737, "step": 554, "batch_size": 64, "mean": 41.98722839355469, "std": 31.148805618286133, "min": -36.22331237792969, "p10": -0.13457412719726558, "median": 43.11760330200195, "p90": 82.4868522644043, "max": 103.73165893554688, "pos_frac": 0.875, "sample": [20.703109741210938, 30.831405639648438, 36.40701675415039, 5.317108154296875, 74.14825439453125, 74.784912109375, 82.93196105957031, 86.6209945678711, 32.428741455078125, 32.004844665527344, 70.35163879394531, 31.581283569335938, 65.78009033203125, 57.001129150390625, 11.333961486816406, 53.414794921875, 42.18426513671875, -2.8727264404296875, 53.456031799316406, 46.62933349609375, 30.15459632873535, -8.652231216430664, 17.56303596496582, 22.51251983642578, 25.268600463867188, -0.15244293212890625, 59.720314025878906, 44.613677978515625, -6.430335998535156, 81.4482650756836, 61.03160858154297, 20.098480224609375, 68.154541015625, 35.12202453613281, 62.48811721801758, 27.39521026611328, 90.35488891601562, 37.52621078491211, 93.73084259033203, -6.736324310302734, 56.21461486816406, 14.796836853027344, 50.235145568847656, 71.48329162597656, 99.49235534667969, 19.783283233642578, 61.99394226074219, 24.128128051757812, 61.191951751708984, 56.58355712890625, 3.905364990234375, 67.09500122070312, 44.050941467285156, -36.22331237792969, 41.01123809814453, -0.0928802490234375, -10.894561767578125, 15.957441329956055, 72.55169677734375, 52.31431579589844, 103.73165893554688, 100.9422378540039, 44.57886505126953, 12.101879119873047], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000554.npy"}
|
|
{"epoch": 0.8135095447870778, "step": 555, "batch_size": 64, "mean": 34.768707275390625, "std": 28.32597541809082, "min": -18.03848648071289, "p10": -3.336881256103511, "median": 34.6026496887207, "p90": 72.52934036254884, "max": 112.2755126953125, "pos_frac": 0.890625, "sample": [37.69239807128906, 34.86283874511719, 42.49775314331055, 48.868255615234375, 34.460540771484375, 83.37469482421875, 29.69775390625, 28.820762634277344, -15.825305938720703, 17.775283813476562, -8.77154541015625, 74.0626449584961, 36.362213134765625, 54.18617248535156, -11.580612182617188, 5.640161514282227, 10.096580505371094, 15.479873657226562, 112.2755126953125, 93.07534790039062, 20.450668334960938, 95.58177185058594, 43.620086669921875, 68.30618286132812, 37.21397399902344, 12.726730346679688, 50.850791931152344, 30.386741638183594, 52.298255920410156, -8.041748046875, 20.034683227539062, 47.1373291015625, 34.74475860595703, 42.11058044433594, 12.778533935546875, 39.18642807006836, 36.67954635620117, 15.232961654663086, 32.04948425292969, 17.51348876953125, -18.03848648071289, 23.235530853271484, 50.11761474609375, 68.95162963867188, 45.414405822753906, -7.587642669677734, 75.66506958007812, 36.169708251953125, 43.74749755859375, -5.232563018798828, 67.50123596191406, 25.6229248046875, 54.94706726074219, 26.094072341918945, 12.633014678955078, 24.162931442260742, 98.40164184570312, 7.431755065917969, 46.476402282714844, 31.725112915039062, 52.63922882080078, 32.646820068359375, 1.0863761901855469, 7.473300933837891], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000555.npy"}
|
|
{"epoch": 0.8149779735682819, "step": 556, "batch_size": 64, "mean": 30.53741455078125, "std": 27.87470054626465, "min": -15.957328796386719, "p10": -3.0588584899902327, "median": 27.80112075805664, "p90": 65.71395874023439, "max": 102.3050537109375, "pos_frac": 0.84375, "sample": [67.410888671875, 1.8830184936523438, 23.707847595214844, 22.41180419921875, 30.500137329101562, 16.857032775878906, 51.495582580566406, 26.57550811767578, -9.828033447265625, 42.07044982910156, -8.137689590454102, 50.776493072509766, 61.75445556640625, 29.988868713378906, 22.50316619873047, 39.40790939331055, 87.63065338134766, 41.17230224609375, 34.6246452331543, 14.298702239990234, 11.315620422363281, 41.36164093017578, -5.4460296630859375, 9.346988677978516, -15.957328796386719, 85.10359191894531, 29.0267333984375, 17.525001525878906, 80.20649719238281, -10.616592407226562, 25.44683837890625, 10.936731338500977, 2.6720848083496094, -3.705841064453125, 57.79368591308594, 2.646677017211914, 25.89588165283203, 29.509078979492188, 36.625343322753906, -0.43253517150878906, 0.15848541259765625, 26.541351318359375, 61.046085357666016, 46.49272918701172, 9.443702697753906, 4.398468017578125, 83.69059753417969, 45.512451171875, 53.641632080078125, -1.5492324829101562, 44.142662048339844, 12.288101196289062, 45.091957092285156, 43.172515869140625, 93.96205139160156, 15.484054565429688, 53.71897888183594, 102.3050537109375, 18.327238082885742, 48.211143493652344, 43.64622116088867, -0.3391532897949219, -7.449424743652344, 36.099117279052734], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000556.npy"}
|
|
{"epoch": 0.8164464023494861, "step": 557, "batch_size": 64, "mean": 30.403928756713867, "std": 26.89265251159668, "min": -19.168643951416016, "p10": 3.0156908035278325, "median": 25.22757911682129, "p90": 69.8290298461914, "max": 108.72360229492188, "pos_frac": 0.90625, "sample": [29.208831787109375, 48.58769226074219, 24.137428283691406, 35.22798156738281, -3.763864517211914, 54.30537414550781, 3.703125, 39.69506072998047, 20.991409301757812, 14.630805969238281, 31.465606689453125, 20.992050170898438, 24.69390106201172, 28.480682373046875, 5.59466552734375, 27.36724853515625, 84.31671142578125, 14.366378784179688, 46.4764518737793, 14.70684814453125, 20.616134643554688, 108.72360229492188, 72.36575317382812, 63.30774688720703, 39.68928527832031, 66.71099853515625, 5.293975830078125, 23.24671173095703, -11.838239669799805, 69.21028137207031, 17.744495391845703, 41.62562561035156, 86.9784927368164, 23.691051483154297, 2.7571372985839844, 6.4986572265625, 10.455066680908203, 5.9170684814453125, 34.220428466796875, 11.155017852783203, 8.524543762207031, -0.2368297576904297, 29.209604263305664, 24.222126007080078, 3.6189823150634766, -0.412017822265625, 50.72761154174805, 74.83389282226562, 24.4224853515625, 40.88689422607422, 26.143680572509766, 92.21576690673828, 70.09420776367188, 40.339088439941406, 37.62584686279297, -2.8434600830078125, -19.168643951416016, 26.154136657714844, 11.049270629882812, 25.76125717163086, 3.8543014526367188, 66.11769104003906, 42.59381866455078, 6.5634613037109375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000557.npy"}
|
|
{"epoch": 0.8179148311306902, "step": 558, "batch_size": 64, "mean": 40.984100341796875, "std": 31.049041748046875, "min": -11.951118469238281, "p10": 10.279118728637696, "median": 32.681020736694336, "p90": 89.30910644531251, "max": 110.35079193115234, "pos_frac": 0.953125, "sample": [26.48309326171875, 6.7919769287109375, 21.539649963378906, 39.179840087890625, 49.846893310546875, 27.507625579833984, 39.68718719482422, 32.366783142089844, 12.77804946899414, -11.951118469238281, 10.700958251953125, 41.28923034667969, 100.65394592285156, 82.26788330078125, 43.211055755615234, 25.64223861694336, 61.027854919433594, 55.076438903808594, 100.06498718261719, 44.201812744140625, 30.881338119506836, 29.969215393066406, 47.67083740234375, 18.945026397705078, 24.06793975830078, 24.02874755859375, 69.04409790039062, 10.503372192382812, 28.57514190673828, 19.190223693847656, 17.444374084472656, 38.0350341796875, 27.500030517578125, 90.62168884277344, 86.24641418457031, 61.56334686279297, 33.1695556640625, 27.440052032470703, 11.329017639160156, 24.004764556884766, 11.54156494140625, 4.024812698364258, 32.99525833129883, 68.28216552734375, 15.646186828613281, -1.5164661407470703, 2.750823974609375, 109.87335205078125, 18.469064712524414, 14.194604873657227, 78.689453125, 100.68074035644531, 66.31121826171875, 67.77743530273438, 83.40478515625, 13.240676879882812, 33.77336502075195, 10.18301010131836, 52.18822479248047, -11.160125732421875, 39.992218017578125, 93.59757995605469, 110.35079193115234, 79.09500122070312], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000558.npy"}
|
|
{"epoch": 0.8193832599118943, "step": 559, "batch_size": 64, "mean": 33.14677429199219, "std": 27.877229690551758, "min": -11.05831527709961, "p10": -2.734062385559081, "median": 27.875383377075195, "p90": 76.70242919921877, "max": 104.95195007324219, "pos_frac": 0.875, "sample": [40.46150207519531, 38.307464599609375, 78.70158386230469, 36.13981628417969, 22.25867462158203, 14.564926147460938, 26.090185165405273, 9.268241882324219, 16.302072525024414, 63.18156433105469, 16.05091094970703, 33.52427673339844, 23.998008728027344, 45.4176025390625, 45.84367370605469, 72.03773498535156, 84.89047241210938, 46.581214904785156, 17.90264892578125, 9.7843017578125, 104.95195007324219, 9.339056015014648, -1.6751155853271484, 49.128997802734375, 10.36029052734375, -3.187896728515625, -8.0650634765625, 29.245208740234375, 11.836627960205078, 40.17008972167969, 13.834259033203125, -11.05831527709961, -10.684928894042969, 20.55776596069336, 34.79069519042969, 66.14169311523438, -3.7284812927246094, 7.519458770751953, 48.871395111083984, 22.008167266845703, 46.36509704589844, -3.701751708984375, 5.461700439453125, 29.67675018310547, 21.553680419921875, 26.505558013916016, 9.50595474243164, 56.61417770385742, 81.60838317871094, -3.834442138671875, 46.66333770751953, 60.95709228515625, 44.66471862792969, 16.608970642089844, 49.25276184082031, 82.99403381347656, 14.574260711669922, 86.52752685546875, 45.163970947265625, 38.63023376464844, 58.33393096923828, 12.500808715820312, 23.682720184326172, 99.4212646484375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000559.npy"}
|
|
{"epoch": 0.8208516886930984, "step": 560, "batch_size": 64, "mean": 28.90864372253418, "std": 24.986602783203125, "min": -15.746879577636719, "p10": 2.1929201126098636, "median": 21.70637035369873, "p90": 62.6974739074707, "max": 104.5703125, "pos_frac": 0.921875, "sample": [58.72618103027344, 20.550323486328125, 53.09508514404297, 36.30606460571289, 7.677507400512695, 36.6307373046875, -4.0427703857421875, -15.746879577636719, 18.749801635742188, 59.592430114746094, 25.578765869140625, 33.516754150390625, 52.54273986816406, 19.981842041015625, 74.92829895019531, 11.26580810546875, 104.5703125, 74.31087493896484, 18.01250457763672, 15.160835266113281, 10.742660522460938, 16.872413635253906, 41.49916076660156, 77.21817016601562, 62.61683654785156, 21.775108337402344, 11.621135711669922, 22.231918334960938, 29.193695068359375, 21.398101806640625, 52.9918327331543, 10.625541687011719, 14.451873779296875, 42.20458221435547, 54.44920349121094, -14.410148620605469, 18.696334838867188, 90.72933959960938, 22.495758056640625, 65.96050262451172, 19.925308227539062, -4.6580810546875, 26.21987533569336, 5.036956787109375, 62.732032775878906, 16.288040161132812, 1.1301021575927734, 37.01140213012695, 3.8003787994384766, 42.308265686035156, 27.47366714477539, 2.700927734375, 18.947166442871094, 38.137725830078125, -0.4747161865234375, 11.174354553222656, 17.763202667236328, 28.847671508789062, 37.447174072265625, 42.9005126953125, 1.9752025604248047, 5.472221374511719, 21.637632369995117, 11.5849609375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000560.npy"}
|
|
{"epoch": 0.8223201174743024, "step": 561, "batch_size": 64, "mean": 29.287330627441406, "std": 26.283649444580078, "min": -18.023117065429688, "p10": -0.26044025421142464, "median": 22.366764068603516, "p90": 68.35388946533203, "max": 99.56396484375, "pos_frac": 0.890625, "sample": [63.917686462402344, 21.575592041015625, 11.38726806640625, -10.536104202270508, -2.0639877319335938, 31.593948364257812, 71.20987701416016, 21.887100219726562, 14.414804458618164, 69.87242889404297, 17.642833709716797, 36.590248107910156, 48.3497314453125, 67.83953857421875, 16.546642303466797, 13.940155029296875, 42.44325637817383, 47.37397003173828, 81.18209075927734, 33.48499298095703, 53.83742904663086, 1.5448455810546875, 39.46882629394531, 57.34165954589844, 24.36638641357422, 18.6976318359375, 78.51678466796875, 44.17609405517578, 21.706880569458008, 40.03291320800781, 33.07218933105469, 95.4078369140625, 7.307670593261719, 56.51375198364258, 55.280914306640625, 22.84642791748047, -3.931304931640625, 2.2464599609375, 13.03264045715332, 11.9637451171875, 12.978944778442383, 0.8418292999267578, 4.280479431152344, 44.169036865234375, 6.44427490234375, -4.6251220703125, 16.603788375854492, 99.56396484375, 16.698881149291992, 35.97789001464844, -15.501480102539062, 16.555999755859375, 34.01835250854492, -0.7328414916992188, 33.04049301147461, 8.022659301757812, 18.31493377685547, 33.880088806152344, 34.18083190917969, 23.138519287109375, 68.57432556152344, -18.023117065429688, 21.621307373046875, 12.283210754394531], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000561.npy"}
|
|
{"epoch": 0.8237885462555066, "step": 562, "batch_size": 64, "mean": 37.77778244018555, "std": 26.89278793334961, "min": -10.914375305175781, "p10": 11.000024795532227, "median": 31.29846954345703, "p90": 75.56397247314453, "max": 104.27523803710938, "pos_frac": 0.96875, "sample": [26.26006317138672, 102.72311401367188, 35.77274703979492, 37.34373474121094, 28.015031814575195, 16.523155212402344, 23.08068084716797, 33.788536071777344, 44.08665466308594, 30.35607147216797, 14.845500946044922, 74.59663391113281, 1.6202945709228516, 66.90348815917969, 71.91777038574219, 11.722915649414062, 33.70928955078125, 62.13871765136719, 7.96479606628418, 38.56317138671875, 18.674957275390625, 35.26056671142578, 52.594573974609375, 20.326818466186523, 10.351287841796875, 63.90290069580078, 104.27523803710938, 21.668548583984375, 92.71736145019531, 12.7921142578125, -0.3878288269042969, 16.84815216064453, 24.844717025756836, 54.454803466796875, 19.475326538085938, 102.24946594238281, 10.78057861328125, 41.93115234375, 51.3638916015625, 51.536895751953125, -10.914375305175781, 31.590621948242188, 72.69153594970703, 62.38972473144531, 11.512065887451172, 49.16350173950195, 25.763572692871094, 81.8402328491211, 31.006317138671875, 75.97854614257812, 17.368976593017578, 48.89320373535156, 45.332908630371094, 17.13327407836914, 9.781639099121094, 42.58137512207031, 16.14449691772461, 16.875539779663086, 19.472017288208008, 19.019657135009766, 18.064685821533203, 36.843109130859375, 87.15748596191406, 24.493972778320312], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000562.npy"}
|
|
{"epoch": 0.8252569750367107, "step": 563, "batch_size": 64, "mean": 30.286453247070312, "std": 23.748916625976562, "min": -14.263046264648438, "p10": 1.9981216430664075, "median": 28.042617797851562, "p90": 58.233010864257814, "max": 97.16363525390625, "pos_frac": 0.921875, "sample": [10.33302116394043, 27.020095825195312, 7.317840576171875, 17.665298461914062, 47.005863189697266, -12.655563354492188, 76.30087280273438, 20.52227783203125, 22.825416564941406, 45.27464294433594, 15.804405212402344, 38.43141555786133, 12.35335922241211, 57.374664306640625, -0.12212371826171875, 50.886138916015625, 27.909255981445312, 74.12075805664062, 28.175979614257812, 42.33558654785156, 53.267486572265625, -0.5662765502929688, 12.071380615234375, 6.3096466064453125, 0.9320220947265625, -5.865711212158203, 38.28038024902344, 58.409423828125, 54.7802734375, 40.94329833984375, 6.951873779296875, 61.44956970214844, -14.263046264648438, 36.0256233215332, 16.790884017944336, 50.51115417480469, 16.711299896240234, 3.2711868286132812, 46.35157012939453, 36.52632141113281, 67.49828338623047, 48.69268798828125, 57.821380615234375, 37.99201202392578, 85.75653076171875, 43.17840576171875, 20.239952087402344, 97.16363525390625, 21.43695068359375, 35.856964111328125, 6.298969268798828, 32.360511779785156, 17.07160186767578, 11.758108139038086, 31.109329223632812, 10.980117797851562, 31.86974334716797, 1.4525222778320312, 34.769561767578125, 11.396203994750977, 52.94524383544922, 15.18545913696289, 21.768638610839844, 15.962610244750977], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000563.npy"}
|
|
{"epoch": 0.8267254038179148, "step": 564, "batch_size": 64, "mean": 32.34123992919922, "std": 22.849916458129883, "min": -15.294261932373047, "p10": 3.799764633178712, "median": 34.44287872314453, "p90": 62.14091453552246, "max": 78.28231811523438, "pos_frac": 0.9375, "sample": [33.74281311035156, 21.402175903320312, 69.60060119628906, 2.6113204956054688, 62.14780044555664, 55.68054962158203, 62.124847412109375, 11.122024536132812, 35.1429443359375, 1.6748199462890625, 36.242950439453125, 31.60706329345703, 36.27971649169922, 41.942169189453125, 60.80027770996094, 9.160675048828125, 33.2357292175293, 15.833625793457031, -15.294261932373047, 14.507797241210938, 43.10481262207031, 5.1782379150390625, 11.671302795410156, -11.300779342651367, 4.6247100830078125, 23.083499908447266, 11.754119873046875, 46.291595458984375, 22.315155029296875, 70.04458618164062, 37.17291259765625, 22.509979248046875, 19.919876098632812, -3.4287338256835938, 42.272911071777344, 38.431480407714844, 49.600616455078125, 48.101219177246094, 43.019683837890625, 3.446216583251953, 54.129547119140625, 38.052947998046875, 13.3992919921875, 10.627738952636719, 36.822235107421875, 10.596435546875, 15.514442443847656, 17.8322696685791, 78.28231811523438, 65.4696044921875, 12.609519958496094, 27.79796600341797, 56.1368408203125, -3.0789222717285156, 56.28315353393555, 15.933006286621094, 51.1096076965332, 76.92648315429688, 50.82176208496094, 36.692840576171875, 64.56988525390625, 55.005226135253906, 22.90078353881836, 58.025291442871094], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000564.npy"}
|
|
{"epoch": 0.8281938325991189, "step": 565, "batch_size": 64, "mean": 35.16046905517578, "std": 30.857439041137695, "min": -15.841583251953125, "p10": 2.890913200378418, "median": 29.055843353271484, "p90": 72.01796112060549, "max": 148.0425567626953, "pos_frac": 0.953125, "sample": [-1.456634521484375, 148.0425567626953, 36.996002197265625, 9.814571380615234, 10.347366333007812, 65.78953552246094, 84.79447174072266, 28.36602020263672, 24.04505157470703, 17.788116455078125, 29.485931396484375, 59.98567199707031, 17.21771240234375, 49.285675048828125, 10.734947204589844, 12.871685028076172, -3.5054492950439453, 29.92705535888672, 14.533477783203125, 56.10789489746094, 28.625755310058594, 37.01294708251953, 55.511573791503906, 88.763916015625, 17.966415405273438, 17.452163696289062, 54.45840072631836, 42.411521911621094, -15.841583251953125, 40.10393524169922, 30.660064697265625, 23.490591049194336, 33.35930252075195, 46.02055358886719, 3.8183822631835938, 3.20916748046875, 62.887779235839844, 61.472084045410156, 4.180580139160156, 24.196517944335938, 19.472732543945312, 46.686492919921875, 21.021255493164062, 61.02940368652344, 46.47908020019531, 129.19125366210938, 8.08210563659668, 24.425613403320312, 80.3512954711914, 8.240753173828125, 23.659744262695312, 1.5721359252929688, 47.07344055175781, 46.64607620239258, 74.68728637695312, 21.746002197265625, 60.05046081542969, 0.7341785430908203, 33.37558364868164, 3.8460216522216797, 2.754518508911133, 2.712919235229492, 33.31695556640625, 92.18280792236328], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000565.npy"}
|
|
{"epoch": 0.8296622613803231, "step": 566, "batch_size": 64, "mean": 33.91465377807617, "std": 30.285097122192383, "min": -19.3923282623291, "p10": 0.17043647766113373, "median": 31.251373291015625, "p90": 72.97052612304688, "max": 112.04925537109375, "pos_frac": 0.890625, "sample": [1.592529296875, 8.058753967285156, 32.70122528076172, 31.716201782226562, 34.4400634765625, -2.352996826171875, 28.492660522460938, 30.786544799804688, -1.2213191986083984, 72.48995971679688, 39.10618591308594, 18.7735595703125, 5.126922607421875, 38.22142791748047, 47.79290771484375, 57.49523162841797, 50.76878356933594, 1.0672340393066406, -0.8996677398681641, 15.0574951171875, 32.27382278442383, 83.05610656738281, 56.489013671875, 8.4373779296875, 48.089569091796875, -19.3923282623291, 53.075714111328125, 45.31653594970703, 15.694513320922852, 47.013671875, 21.757835388183594, 112.04925537109375, 67.98139190673828, 11.605331420898438, 96.64950561523438, 9.355354309082031, -14.286750793457031, 63.90740966796875, 29.700424194335938, -1.7264251708984375, 10.823753356933594, 60.00971984863281, 3.0768051147460938, 27.898033142089844, 32.95323181152344, 80.07038879394531, 73.11791229248047, 35.89250946044922, 6.9878692626953125, 1.2334327697753906, 7.218557357788086, 111.11724853515625, 17.269882202148438, 72.62662506103516, 4.11631965637207, 23.40314483642578, 59.01792907714844, 34.52549743652344, 90.40155029296875, -0.21390533447265625, 10.714519500732422, 26.39453125, 44.70732116699219, 60.91400146484375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000566.npy"}
|
|
{"epoch": 0.8311306901615272, "step": 567, "batch_size": 64, "mean": 32.32299041748047, "std": 28.5046329498291, "min": -38.971954345703125, "p10": -0.9530776977539058, "median": 32.892391204833984, "p90": 72.72854919433594, "max": 85.36883544921875, "pos_frac": 0.875, "sample": [5.056243896484375, 40.84173583984375, 54.87506866455078, 69.15830993652344, 13.857376098632812, 38.526641845703125, 11.548408508300781, 26.330970764160156, 63.512184143066406, 43.420738220214844, 50.614295959472656, -4.249977111816406, 66.29872131347656, 32.66712951660156, 76.2315444946289, 47.9208984375, 64.16349792480469, 75.67893981933594, 73.29885864257812, 5.663646697998047, 60.68315124511719, 51.46897888183594, -16.345823287963867, 65.20820617675781, 48.019622802734375, 33.117652893066406, 21.482074737548828, 29.60479736328125, 6.967443466186523, 45.20686340332031, 30.338388442993164, 60.19920349121094, 12.902427673339844, -2.9912033081054688, 38.08071517944336, 2.6700439453125, 33.72812271118164, 82.16459655761719, 85.36883544921875, 79.91029357910156, 20.093154907226562, -38.971954345703125, 18.963607788085938, 4.4123077392578125, 22.852561950683594, 43.048709869384766, 10.7891845703125, 40.26860046386719, 11.970169067382812, -10.011877059936523, -12.9305419921875, 74.62667846679688, 0.8571090698242188, 71.3978271484375, 4.75433349609375, 33.48896026611328, -1.1356201171875, 23.322280883789062, -0.5271453857421875, 34.151546478271484, 51.921470642089844, 8.768863677978516, 1.5780982971191406, 31.783401489257812], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000567.npy"}
|
|
{"epoch": 0.8325991189427313, "step": 568, "batch_size": 64, "mean": 37.909576416015625, "std": 27.431215286254883, "min": -16.814102172851562, "p10": 2.084158897399906, "median": 37.270748138427734, "p90": 76.54836807250979, "max": 113.576904296875, "pos_frac": 0.90625, "sample": [37.84190368652344, 29.217655181884766, 85.31636810302734, 21.83316421508789, 22.682579040527344, 71.18658447265625, 57.99766540527344, 31.010154724121094, 36.76548767089844, -16.814102172851562, 9.200668334960938, 0.6134357452392578, 81.41046142578125, 83.9456787109375, -1.0751419067382812, 60.85934829711914, 30.06024932861328, 7.362712860107422, 11.727985382080078, 40.86277770996094, 32.432167053222656, 34.839359283447266, 88.02679443359375, 37.77600860595703, 43.41477966308594, 57.38532257080078, 12.215858459472656, 59.72673034667969, 9.684593200683594, 17.11772918701172, 38.716064453125, 61.76588439941406, 48.63213348388672, 59.23768615722656, 57.95121765136719, 34.114707946777344, 43.53224182128906, 30.345985412597656, 39.25969314575195, 78.84627532958984, 113.576904296875, -4.146781921386719, 29.864398956298828, 46.429203033447266, 31.663040161132812, 29.876129150390625, -6.477481842041016, 14.80270767211914, 13.611095428466797, 102.47779083251953, 26.142946243286133, 60.543006896972656, 5.515846252441406, 46.84766387939453, 41.65803527832031, 52.126190185546875, -13.881248474121094, 39.42771911621094, 45.56782150268555, 51.236114501953125, 28.020418167114258, 60.22119140625, -0.8125686645507812, 24.89589500427246], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000568.npy"}
|
|
{"epoch": 0.8340675477239354, "step": 569, "batch_size": 64, "mean": 30.12950897216797, "std": 27.329198837280273, "min": -19.886856079101562, "p10": -1.7657257080078113, "median": 26.042662620544434, "p90": 71.48460159301759, "max": 102.46028137207031, "pos_frac": 0.859375, "sample": [5.565757751464844, 15.868202209472656, 102.46028137207031, 38.70249938964844, 22.782562255859375, -0.598724365234375, 8.981918334960938, 72.91715240478516, 2.0687084197998047, 0.4139556884765625, 34.709747314453125, 96.97822570800781, 28.457059860229492, 26.61583709716797, 43.3585090637207, -9.919235229492188, 29.71387481689453, 68.14198303222656, 18.011661529541016, 16.1675968170166, -19.886856079101562, 39.05029296875, 22.870784759521484, 16.093570709228516, 13.154897689819336, 56.35490417480469, 89.67790222167969, 11.272872924804688, -9.090301513671875, -2.3919830322265625, 10.270345687866211, 80.04901123046875, 46.02326202392578, 12.615667343139648, 29.247535705566406, 53.11644744873047, -2.91790771484375, 45.31118392944336, 61.45027160644531, -0.35400390625, 75.19021606445312, 7.097663879394531, -7.696542739868164, 52.46397399902344, 16.66571044921875, 25.4694881439209, 41.77040100097656, 36.8797607421875, 11.474533081054688, 16.09320068359375, 19.41962432861328, 73.60238647460938, 16.24506378173828, 44.07482147216797, 37.618019104003906, -2.265869140625, 12.463264465332031, 41.79172897338867, 51.53117370605469, 31.70184326171875, 27.182510375976562, 23.833274841308594, 65.30543518066406, 37.06158447265625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000569.npy"}
|
|
{"epoch": 0.8355359765051396, "step": 570, "batch_size": 64, "mean": 35.00107955932617, "std": 25.37564468383789, "min": -14.023933410644531, "p10": 4.037163543701173, "median": 30.60845375061035, "p90": 69.01341857910157, "max": 107.7603759765625, "pos_frac": 0.90625, "sample": [4.510215759277344, 59.848548889160156, 44.26353073120117, 69.76624298095703, 20.904197692871094, 37.732303619384766, 56.03830337524414, 55.645355224609375, 85.51156616210938, 67.25682830810547, 46.235107421875, 100.43549346923828, 74.76380157470703, 29.087173461914062, 16.644243240356445, 20.623294830322266, 107.7603759765625, 29.42115592956543, 61.81293487548828, 11.822731018066406, 3.8344268798828125, -7.781227111816406, 35.45221710205078, 20.654342651367188, 7.62640380859375, 60.21278381347656, 30.382129669189453, 30.83477783203125, -14.023933410644531, 37.12098693847656, 31.43022918701172, 37.50306701660156, 20.494300842285156, 21.921632766723633, 46.37518310546875, 71.971923828125, -0.4190101623535156, 65.30500793457031, 8.271598815917969, 40.37110900878906, 17.772485733032227, 29.431507110595703, -0.012420654296875, 27.450599670410156, 53.744110107421875, 26.85224151611328, 33.11650848388672, 46.024070739746094, 22.12095832824707, 60.9608154296875, -0.3234138488769531, 22.78466796875, 70.3511734008789, 42.61961364746094, 34.01251220703125, 26.683273315429688, 15.231620788574219, 26.2535400390625, 15.849472045898438, -3.301074981689453, 12.70480728149414, 48.8546142578125, 23.866100311279297, 39.39987564086914], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000570.npy"}
|
|
{"epoch": 0.8370044052863436, "step": 571, "batch_size": 64, "mean": 34.58122634887695, "std": 27.801191329956055, "min": -13.674713134765625, "p10": 3.8029483795166046, "median": 29.269794464111328, "p90": 71.80464401245118, "max": 105.92010498046875, "pos_frac": 0.90625, "sample": [53.71867370605469, 38.31494903564453, 24.542186737060547, 15.65155029296875, -5.569719314575195, 43.87464904785156, 8.578075408935547, 39.56095504760742, 36.88017272949219, 46.6158447265625, 100.42495727539062, 22.238628387451172, 52.883583068847656, 11.747550964355469, 26.638774871826172, 20.498048782348633, 66.92218017578125, 105.92010498046875, 20.113128662109375, 51.76775360107422, 56.06555938720703, 100.87132263183594, 43.94084167480469, -2.554962158203125, 6.805091857910156, 30.557844161987305, 42.45030975341797, 43.537254333496094, 9.501375198364258, 37.28715515136719, 25.829702377319336, 13.438323974609375, 85.12600708007812, 70.20263671875, 95.72666931152344, 18.22745132446289, 14.347503662109375, 28.654327392578125, 61.21580123901367, 66.05619049072266, 40.0944938659668, 15.351062774658203, 72.49121856689453, 24.438194274902344, 2.516315460205078, 38.64873504638672, 77.38127136230469, 48.132415771484375, -2.8586463928222656, 25.64197540283203, 10.796939849853516, -3.184783935546875, 42.05565643310547, 16.45850372314453, 11.62939453125, 29.88526153564453, 21.527423858642578, 7.330753326416016, -13.674713134765625, 37.59329605102539, 12.846630096435547, 54.776634216308594, 21.921920776367188, -7.2098388671875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000571.npy"}
|
|
{"epoch": 0.8384728340675477, "step": 572, "batch_size": 64, "mean": 32.25321960449219, "std": 31.98067283630371, "min": -42.118682861328125, "p10": -3.7617568969726545, "median": 29.165355682373047, "p90": 75.708935546875, "max": 122.31629943847656, "pos_frac": 0.84375, "sample": [1.7519302368164062, -10.956329345703125, 18.346221923828125, 50.95433044433594, 76.92770385742188, 35.16020965576172, -4.6256256103515625, 8.286155700683594, -7.094387054443359, 6.063240051269531, 10.951292037963867, 3.0989227294921875, 53.72795867919922, 13.041667938232422, 72.49177551269531, 49.30182647705078, 38.962921142578125, 83.33039093017578, 12.654678344726562, 51.88314437866211, 3.8181838989257812, 27.30145263671875, 41.48974609375, 54.83966827392578, 16.80046844482422, -1.1311397552490234, -6.713161468505859, 9.335746765136719, -10.9730224609375, 58.20817565917969, 44.22862243652344, 16.640296936035156, 51.897884368896484, 21.423202514648438, 4.319377899169922, 122.31629943847656, 52.69715118408203, 28.92089080810547, 48.484039306640625, 39.44709777832031, 66.0565185546875, 26.63994598388672, -42.118682861328125, 58.03620147705078, -0.74505615234375, -41.125762939453125, 91.07815551757812, 24.242576599121094, 26.612394332885742, -1.746063232421875, 75.93055725097656, 14.350914001464844, 20.91588592529297, 53.35247039794922, 16.992511749267578, 33.28792190551758, 75.19181823730469, 91.34498596191406, 34.1065673828125, 38.520233154296875, 37.937294006347656, 79.60734558105469, 68.71839904785156, 29.409820556640625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000572.npy"}
|
|
{"epoch": 0.8399412628487518, "step": 573, "batch_size": 64, "mean": 34.536949157714844, "std": 28.615467071533203, "min": -20.89297103881836, "p10": 2.5046955108642592, "median": 29.955368041992188, "p90": 73.86744689941406, "max": 107.09217071533203, "pos_frac": 0.921875, "sample": [53.073509216308594, 14.00285530090332, 33.99073028564453, 19.233062744140625, 27.077674865722656, 54.04823303222656, 13.376174926757812, 25.998146057128906, 72.70304107666016, 64.93045043945312, 15.609420776367188, 44.80467987060547, 0.6434249877929688, 107.09217071533203, 45.014739990234375, 84.39158630371094, 27.334693908691406, -20.89297103881836, 7.161041259765625, 15.068328857421875, 37.920494079589844, 57.64421844482422, 11.949851989746094, 61.4671630859375, -6.830513000488281, 75.80953979492188, 93.88723754882812, 72.64151000976562, 31.487041473388672, 52.848876953125, -11.810684204101562, 12.33087158203125, 9.112201690673828, 45.7841796875, 31.776023864746094, 11.055122375488281, 53.850074768066406, -9.861648559570312, 42.83153533935547, -5.785957336425781, 91.36219787597656, 20.883934020996094, 60.803955078125, 3.968341827392578, 5.3654632568359375, 8.214475631713867, 40.408905029296875, 89.56549072265625, 29.970443725585938, 20.970314025878906, 46.33074951171875, 28.69597625732422, 3.9993896484375, 19.250869750976562, 25.971206665039062, 1.8774185180664062, 52.475738525390625, 74.3664779663086, 65.75013732910156, 29.940292358398438, 28.28973388671875, 34.09251403808594, 12.597221374511719, 42.44544219970703], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000573.npy"}
|
|
{"epoch": 0.8414096916299559, "step": 574, "batch_size": 64, "mean": 34.14720916748047, "std": 28.386531829833984, "min": -31.84403419494629, "p10": 5.22524528503418, "median": 27.535313606262207, "p90": 77.24646072387696, "max": 92.2944107055664, "pos_frac": 0.953125, "sample": [12.281944274902344, 47.953033447265625, 12.498611450195312, 23.508033752441406, 18.256134033203125, 11.301193237304688, -31.84403419494629, 75.58221435546875, 38.54505157470703, 54.36053466796875, 39.45874786376953, 57.44956970214844, 69.03972625732422, 61.719383239746094, 33.11177062988281, 16.786026000976562, 10.448053359985352, 22.963943481445312, 19.572601318359375, 91.05045318603516, 5.4846343994140625, 7.6846923828125, 13.15496826171875, 44.7767333984375, 22.13885498046875, 68.73348236083984, 34.93993377685547, 1.30950927734375, 14.616024017333984, 16.107799530029297, 36.14459228515625, 8.572032928466797, 37.92041015625, 92.2944107055664, 5.598182678222656, 21.900480270385742, -23.418128967285156, 69.218505859375, 23.815628051757812, 29.238487243652344, 89.50398254394531, 10.590972900390625, 83.14996337890625, 51.109275817871094, -6.15960693359375, 13.51727294921875, 3.9213485717773438, 23.747180938720703, 36.69096374511719, 23.55670166015625, 44.97303009033203, 5.114078521728516, 1.6260242462158203, 85.31735229492188, 45.48577880859375, 32.02935028076172, 43.811798095703125, 87.66873168945312, 41.89586639404297, 59.10459899902344, 77.95970916748047, 67.8709487915039, 22.82963752746582, 25.83213996887207], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000574.npy"}
|
|
{"epoch": 0.8428781204111601, "step": 575, "batch_size": 64, "mean": 36.32011795043945, "std": 31.866594314575195, "min": -36.50041198730469, "p10": -3.2526168823242183, "median": 32.09042930603027, "p90": 76.4083480834961, "max": 106.14752197265625, "pos_frac": 0.828125, "sample": [18.72315216064453, 51.72999572753906, 91.1700210571289, -3.4823455810546875, 57.0179443359375, 29.291961669921875, 51.31146240234375, 68.2662124633789, 39.066139221191406, 35.505577087402344, 23.453344345092773, 29.6309814453125, -10.351409912109375, 29.350021362304688, 47.24787902832031, 64.6013412475586, 14.984039306640625, 84.34615325927734, 32.42154312133789, 60.84223937988281, 52.27214050292969, 22.126678466796875, 75.26551818847656, 14.63775634765625, 26.68351173400879, -9.641242980957031, -15.997146606445312, 49.311519622802734, 13.045158386230469, 31.00405502319336, 24.483930587768555, 81.15792846679688, 33.41722869873047, 68.97946166992188, -2.716583251953125, -2.606964111328125, 69.07010650634766, -1.7978076934814453, 106.14752197265625, 43.73773193359375, 31.759315490722656, 3.6676864624023438, -27.05779266357422, 57.26873779296875, 84.97976684570312, -36.50041198730469, 61.919090270996094, 57.93175506591797, 31.328414916992188, 11.2294921875, 7.1225433349609375, 101.56460571289062, 57.901641845703125, 22.528533935546875, 73.2019271850586, 29.466100692749023, 63.38935852050781, 22.602821350097656, 76.89813232421875, -0.6434516906738281, -18.17017364501953, 37.576316833496094, 25.58861541748047, 55.227806091308594], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000575.npy"}
|
|
{"epoch": 0.8443465491923642, "step": 576, "batch_size": 64, "mean": 34.768131256103516, "std": 30.357736587524414, "min": -30.184276580810547, "p10": 6.310462188720703, "median": 27.14690399169922, "p90": 80.69935455322268, "max": 121.17652130126953, "pos_frac": 0.953125, "sample": [51.260780334472656, 92.17681884765625, -30.184276580810547, 21.310161590576172, 0.8250656127929688, 6.435634613037109, 19.497520446777344, 54.0133056640625, 34.111148834228516, 22.102386474609375, 37.71288299560547, 88.82255554199219, 19.04845428466797, 26.439468383789062, 52.8826904296875, 40.2076416015625, 69.2947998046875, 17.534317016601562, 74.43633270263672, 9.280471801757812, 37.464202880859375, 43.15277099609375, 58.98612976074219, 93.11802673339844, 14.011524200439453, 94.60043334960938, 12.462520599365234, 9.999156951904297, 61.12139892578125, 56.098785400390625, -21.692115783691406, 27.854339599609375, 30.518585205078125, 6.256816864013672, 46.236541748046875, 83.38350677490234, 121.17652130126953, 15.532196044921875, 11.656583786010742, 51.652732849121094, 9.333541870117188, 35.53227996826172, 29.50529670715332, 36.43742370605469, 11.77392578125, 0.0980987548828125, 9.003082275390625, 24.13927459716797, -2.530059814453125, 20.29475975036621, 16.377342224121094, 99.35942077636719, 65.81587219238281, 65.6484375, 7.666751861572266, 1.7664852142333984, 23.748313903808594, 19.574920654296875, 33.42936706542969, 16.360626220703125, 37.22698211669922, 21.502639770507812, 14.439285278320312, 67.85749053955078], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000576.npy"}
|
|
{"epoch": 0.8458149779735683, "step": 577, "batch_size": 64, "mean": 38.08292007446289, "std": 28.71381950378418, "min": -25.922870635986328, "p10": 5.749527168273927, "median": 36.8105411529541, "p90": 72.37517013549805, "max": 109.6441650390625, "pos_frac": 0.90625, "sample": [7.666839599609375, 109.28489685058594, 38.42860412597656, 85.91873931884766, 9.286764144897461, 37.59251403808594, 36.29738235473633, 43.38507843017578, 39.86759948730469, 14.24322509765625, 69.3262939453125, 23.0321044921875, 20.719173431396484, 60.06378173828125, 32.99236297607422, 66.66358184814453, 109.6441650390625, 89.72285461425781, 37.323699951171875, 25.77279281616211, 17.48367691040039, 32.151817321777344, 8.550987243652344, 20.455902099609375, 70.74871826171875, 21.190784454345703, 12.17502212524414, 60.91028594970703, 71.11196899414062, 34.047813415527344, 51.363861083984375, 5.3868255615234375, 27.280202865600586, 38.12623596191406, 64.77940368652344, 37.43603515625, 72.91654205322266, 29.52768325805664, 30.53734588623047, -1.400360107421875, 6.595830917358398, 7.405364990234375, 41.29264831542969, 25.909774780273438, -2.411346435546875, -4.077579498291016, -2.4596939086914062, 48.50053024291992, 14.707130432128906, 99.79006958007812, 40.480735778808594, 45.34208679199219, 46.199615478515625, 15.790632247924805, 79.39199829101562, 70.64299011230469, 57.82637405395508, -0.4207611083984375, -25.922870635986328, 35.027889251708984, 42.104774475097656, 20.765907287597656, 51.87666320800781, 60.934783935546875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000577.npy"}
|
|
{"epoch": 0.8472834067547724, "step": 578, "batch_size": 64, "mean": 35.280113220214844, "std": 27.42318344116211, "min": -13.554534912109375, "p10": 3.180938148498536, "median": 28.675315856933594, "p90": 79.34371719360352, "max": 95.07947540283203, "pos_frac": 0.921875, "sample": [67.2209701538086, 44.40078353881836, 38.894561767578125, 8.521923065185547, 57.494224548339844, 40.68440628051758, 88.086669921875, 15.2867431640625, 14.242542266845703, -11.32577133178711, 4.043787002563477, 78.9838638305664, 48.310028076171875, 25.447349548339844, 27.669219970703125, 22.99980926513672, 47.239227294921875, 10.509071350097656, 55.988037109375, 66.310791015625, 26.55780029296875, 45.40327453613281, 15.785463333129883, 8.802452087402344, -4.270771026611328, 2.811145782470703, 50.61630630493164, 22.077028274536133, 34.13687515258789, 19.97014617919922, 29.681411743164062, 14.446578979492188, 32.809349060058594, 86.62873840332031, 16.288070678710938, 84.10977172851562, 58.213539123535156, 6.165679931640625, -1.2089157104492188, 13.72909164428711, 39.73204803466797, 43.11566925048828, 53.45431900024414, 11.325328826904297, 77.6688461303711, 27.238677978515625, 13.529335021972656, 2.5403594970703125, 32.14317321777344, 24.072378158569336, 40.74182891845703, 84.82832336425781, 19.732643127441406, 85.23329162597656, 65.72550964355469, 18.489871978759766, -2.058441162109375, 79.49794006347656, 95.07947540283203, 57.1417236328125, 36.32989501953125, -13.554534912109375, 26.200714111328125, 25.957550048828125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000578.npy"}
|
|
{"epoch": 0.8487518355359766, "step": 579, "batch_size": 64, "mean": 30.629066467285156, "std": 23.335224151611328, "min": -9.793693542480469, "p10": 2.567663574218751, "median": 26.81344985961914, "p90": 63.737873077392585, "max": 94.16844940185547, "pos_frac": 0.953125, "sample": [1.0652923583984375, 39.173095703125, 19.500160217285156, 47.677345275878906, 6.7056884765625, -0.97918701171875, 33.553199768066406, 57.915069580078125, 36.40251541137695, 64.30864715576172, 4.992195129394531, 24.842021942138672, 9.389228820800781, 43.511810302734375, 69.80670166015625, 36.5333251953125, 15.600326538085938, 60.06966018676758, 94.16844940185547, -8.633148193359375, -9.793693542480469, 22.557594299316406, 22.840492248535156, 15.440498352050781, 32.77796173095703, 40.048057556152344, 31.295440673828125, 7.767255783081055, 31.726009368896484, 38.449607849121094, 43.82609558105469, 7.0017242431640625, 19.0853271484375, 79.68719482421875, 67.51653289794922, 50.43206787109375, 47.84846115112305, 3.2503814697265625, 14.177841186523438, 35.96259307861328, 22.064788818359375, 17.30721664428711, 54.13520050048828, 0.0911407470703125, 81.03179168701172, 14.537252426147461, 42.76948165893555, 18.815322875976562, 2.2750701904296875, 44.33643341064453, 32.82794952392578, 22.57024383544922, 28.081932067871094, 24.467453002929688, 23.056365966796875, 7.4793701171875, 17.29058074951172, 33.087867736816406, 1.7942390441894531, 42.811798095703125, 25.544967651367188, 76.83597564697266, 7.141931533813477, 62.40606689453125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000579.npy"}
|
|
{"epoch": 0.8502202643171806, "step": 580, "batch_size": 64, "mean": 40.519554138183594, "std": 30.12722396850586, "min": -17.143766403198242, "p10": 3.5681438446044975, "median": 41.28202819824219, "p90": 67.65886688232422, "max": 145.67376708984375, "pos_frac": 0.9375, "sample": [26.308273315429688, 67.28091430664062, 20.781448364257812, 28.884498596191406, 28.561573028564453, 41.579734802246094, 63.20359802246094, 117.3607406616211, 14.771480560302734, 67.82084655761719, 47.12157440185547, 14.360763549804688, 145.67376708984375, 16.85724639892578, 10.928226470947266, 67.25436401367188, 47.139137268066406, 1.3014183044433594, 80.09905242919922, 57.337364196777344, 29.104860305786133, 60.7650146484375, 38.945152282714844, 45.57301712036133, 26.347583770751953, 48.06822967529297, 37.257720947265625, 59.44253158569336, 54.68095397949219, 9.933189392089844, 42.65547180175781, 61.61530685424805, 16.87537384033203, 0.7694454193115234, 91.43052673339844, 66.07443237304688, 21.94426727294922, -17.143766403198242, 55.80638885498047, 59.346519470214844, 50.689117431640625, 49.12237548828125, 27.549243927001953, 66.18511962890625, 10.730865478515625, 42.05836486816406, -15.445823669433594, -10.16343879699707, 16.770477294921875, 78.07460021972656, 62.193023681640625, 0.7975387573242188, 54.18293762207031, 40.98432159423828, 25.967147827148438, 17.451936721801758, 8.857170104980469, 40.606689453125, -1.302001953125, 17.492515563964844, 34.38508605957031, 91.29188537597656, 54.38726043701172, 56.266876220703125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000580.npy"}
|
|
{"epoch": 0.8516886930983847, "step": 581, "batch_size": 64, "mean": 31.124555587768555, "std": 24.851299285888672, "min": -12.298234939575195, "p10": 3.7582130432128906, "median": 26.301854133605957, "p90": 63.35090560913087, "max": 81.91606903076172, "pos_frac": 0.921875, "sample": [61.1529541015625, 15.904541015625, 34.85566711425781, 14.301368713378906, 45.456817626953125, 26.72713279724121, -7.756866455078125, 25.5740966796875, 1.0033950805664062, 42.717098236083984, 19.429641723632812, 50.56804656982422, 57.634918212890625, 64.28437042236328, 42.92445373535156, -7.1319122314453125, 78.55279541015625, 16.668479919433594, 81.86958312988281, 81.29298400878906, 4.696739196777344, 61.172821044921875, 10.809043884277344, 13.3367919921875, 16.244422912597656, 33.366790771484375, 40.8398551940918, 6.9217071533203125, 32.62434387207031, 23.80748748779297, -12.298234939575195, 76.41474914550781, 25.876575469970703, 30.253814697265625, 48.615875244140625, -11.848670959472656, 60.91899108886719, 59.075096130371094, -6.0829925537109375, 52.3740234375, 41.50480651855469, 29.554954528808594, 25.55693817138672, 38.397979736328125, 49.64430236816406, 13.829246520996094, 22.410720825195312, 59.28932189941406, 73.28923034667969, 5.515716552734375, 3.8405380249023438, 12.08404541015625, 15.493597030639648, 5.3148345947265625, 48.541282653808594, 24.01641082763672, 11.186405181884766, 10.11181640625, 3.722930908203125, 28.833518981933594, 19.63364601135254, 81.91606903076172, 22.188232421875, 32.946266174316406], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000581.npy"}
|
|
{"epoch": 0.8531571218795888, "step": 582, "batch_size": 64, "mean": 35.69976043701172, "std": 29.193857192993164, "min": -15.417526245117188, "p10": -0.13444519042968744, "median": 30.738082885742188, "p90": 77.87541580200195, "max": 106.04348754882812, "pos_frac": 0.875, "sample": [-15.417526245117188, 39.29399871826172, 60.57240295410156, 62.317142486572266, 77.86273193359375, -1.394195556640625, 54.97595977783203, 41.785797119140625, 31.324508666992188, 39.09074783325195, 12.730936050415039, 47.82637023925781, 11.6439208984375, 68.87773132324219, 16.433326721191406, 73.39147186279297, 12.45695686340332, 21.308120727539062, 51.494834899902344, 80.30536651611328, 30.802536010742188, 13.589790344238281, 68.91756439208984, 18.396347045898438, 20.675506591796875, 98.90101623535156, 23.945083618164062, 32.662498474121094, 29.748367309570312, 50.54241943359375, 30.673629760742188, 77.88085174560547, 49.70170593261719, 15.593711853027344, -5.0238037109375, 36.929935455322266, 10.763740539550781, -0.5755081176757812, 29.864593505859375, 56.65554428100586, 9.694808959960938, 17.80522918701172, 106.04348754882812, 77.88252258300781, 0.484893798828125, 62.984039306640625, -6.557960510253906, -0.08222770690917969, 11.72580337524414, 36.15778350830078, 3.5365028381347656, 81.59649658203125, 88.80642700195312, 27.289443969726562, -3.584260940551758, 0.9269447326660156, 59.18406677246094, 13.725513458251953, 32.177947998046875, 25.099349975585938, 70.99969482421875, -0.15682411193847656, 25.440994262695312, 66.07781219482422], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000582.npy"}
|
|
{"epoch": 0.8546255506607929, "step": 583, "batch_size": 64, "mean": 29.54582977294922, "std": 29.761417388916016, "min": -27.016159057617188, "p10": -1.468618011474609, "median": 24.281688690185547, "p90": 66.2842010498047, "max": 161.9598846435547, "pos_frac": 0.875, "sample": [28.480804443359375, 63.511199951171875, 36.87066650390625, 57.855377197265625, -10.852022171020508, 5.8112030029296875, 45.325714111328125, 41.7979621887207, 161.9598846435547, 20.851150512695312, 14.54840087890625, 20.28139305114746, 23.58354949951172, 35.814693450927734, 24.979827880859375, -1.6269607543945312, 73.81275939941406, 12.89727783203125, 54.35070037841797, -9.189292907714844, 18.578712463378906, 15.214956283569336, 28.85555648803711, -27.016159057617188, 9.815408706665039, 2.3639984130859375, 37.92140197753906, 3.5219039916992188, -22.861305236816406, 30.580745697021484, 17.38991928100586, 23.505340576171875, 45.190895080566406, 67.30960083007812, 40.411720275878906, 70.02027893066406, 33.607444763183594, 0.5143966674804688, 63.8916015625, 17.19793701171875, -3.0030517578125, 51.8599739074707, 38.04918670654297, -1.099151611328125, 59.4991455078125, 13.906356811523438, 71.34384155273438, 14.066299438476562, -5.370929718017578, 3.9783401489257812, 57.67316436767578, 26.412147521972656, 5.0936431884765625, 47.325416564941406, 20.42376708984375, 7.2555389404296875, 26.489978790283203, 14.386762619018555, 33.3310546875, 74.16085815429688, 76.41008758544922, 44.93196105957031, 18.272369384765625, 18.45770263671875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000583.npy"}
|
|
{"epoch": 0.856093979441997, "step": 584, "batch_size": 64, "mean": 39.369117736816406, "std": 26.277454376220703, "min": -9.205215454101562, "p10": 8.617363739013673, "median": 34.00349426269531, "p90": 77.99864959716797, "max": 95.33646392822266, "pos_frac": 0.96875, "sample": [13.054931640625, 51.61073303222656, 20.577377319335938, 27.40282440185547, 5.690277099609375, 33.52699279785156, 45.42463684082031, 76.29351043701172, 39.892120361328125, 13.003490447998047, 11.769989013671875, 20.96080780029297, 15.019317626953125, 73.9131088256836, 57.397705078125, 80.18995666503906, 22.822933197021484, 95.33646392822266, 91.8830795288086, 0.5787887573242188, 57.39892578125, 8.594459533691406, 15.726844787597656, 31.552040100097656, 5.974763870239258, -9.205215454101562, 93.3873291015625, 33.93141174316406, 37.523193359375, 59.11524200439453, 34.00616455078125, 26.98614501953125, 17.019004821777344, -5.689460754394531, 49.25773620605469, 70.79029846191406, 34.000823974609375, 65.2655258178711, 0.7107391357421875, 26.843841552734375, 58.341636657714844, 37.86872863769531, 22.58587646484375, 89.28167724609375, 19.452213287353516, 44.34600067138672, 78.72942352294922, 47.025169372558594, 22.328842163085938, 26.060409545898438, 41.258766174316406, 52.11112976074219, 66.35164642333984, 65.92791748046875, 40.223777770996094, 74.25635528564453, 29.145750045776367, 8.670806884765625, 39.51100158691406, 51.645301818847656, 30.00871467590332, 33.835723876953125, 79.80787658691406, 11.310104370117188], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000584.npy"}
|
|
{"epoch": 0.8575624082232012, "step": 585, "batch_size": 64, "mean": 33.05250549316406, "std": 28.828380584716797, "min": -26.50246810913086, "p10": -0.8342622756957998, "median": 30.383756637573242, "p90": 72.69863662719729, "max": 110.55718231201172, "pos_frac": 0.890625, "sample": [29.395706176757812, 19.138832092285156, 74.86255645751953, 49.23896789550781, 0.8308639526367188, 80.91217041015625, 38.31657791137695, -2.3613815307617188, 38.09486389160156, 31.884613037109375, 56.553062438964844, 27.902801513671875, 5.97564697265625, 34.34392547607422, 44.835052490234375, 56.74528884887695, 67.64949035644531, 24.333457946777344, 106.6443862915039, 60.071266174316406, 50.6572380065918, 110.55718231201172, 11.417991638183594, 42.9365234375, 9.250160217285156, 33.90076446533203, 31.532228469848633, 34.00971984863281, 95.06317901611328, 25.878753662109375, 1.0285186767578125, 20.362762451171875, 11.619560241699219, -10.3612060546875, 34.56511688232422, 35.86144256591797, 46.36305236816406, -26.50246810913086, 13.848731994628906, 83.16921997070312, 28.902786254882812, 65.32435607910156, 38.74568176269531, 0.679107666015625, 39.57086181640625, 26.203720092773438, 20.01618194580078, 2.5937957763671875, 19.614349365234375, 29.895322799682617, 0.06778907775878906, -7.319873809814453, 15.88137435913086, -2.896148681640625, 3.151611328125, 84.91143798828125, 57.63140869140625, 30.61959457397461, 61.57288360595703, -1.33392333984375, -1.220855712890625, 26.17485809326172, 30.147918701171875, 45.89958190917969], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000585.npy"}
|
|
{"epoch": 0.8590308370044053, "step": 586, "batch_size": 64, "mean": 32.81856155395508, "std": 29.983264923095703, "min": -29.324594497680664, "p10": 0.5303192138671879, "median": 28.02629852294922, "p90": 79.1125915527344, "max": 98.31903839111328, "pos_frac": 0.90625, "sample": [98.31903839111328, 28.803855895996094, 13.45071029663086, 40.84418487548828, 39.680824279785156, 25.220869064331055, 5.5749969482421875, 24.300987243652344, 37.53083801269531, 62.60773468017578, 48.975914001464844, -2.5385208129882812, 31.857070922851562, 86.50299072265625, -7.941583633422852, 22.301532745361328, 30.27655792236328, -24.846357345581055, 31.400650024414062, 12.142675399780273, 14.497163772583008, 15.779748916625977, 42.60021209716797, 68.05400085449219, 65.41996765136719, -22.871170043945312, 44.373634338378906, 17.33307647705078, 8.924400329589844, 20.598907470703125, 67.41313171386719, 27.011497497558594, 46.03749084472656, 15.913719177246094, 15.027006149291992, 18.875396728515625, 45.518280029296875, -3.4667434692382812, 4.852455139160156, 40.46446228027344, 92.92599487304688, 11.1746826171875, 83.99687194824219, 41.89691162109375, 0.9389266967773438, -29.324594497680664, 27.248741149902344, 42.36798095703125, 81.62953186035156, 15.268730163574219, 0.35520172119140625, 10.436424255371094, 10.467002868652344, 96.48014831542969, 83.20854949951172, 31.888519287109375, 73.23973083496094, 71.64840698242188, 5.432655334472656, 10.646827697753906, 42.39500427246094, 73.092529296875, 27.0009765625, 39.15058898925781], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000586.npy"}
|
|
{"epoch": 0.8604992657856094, "step": 587, "batch_size": 64, "mean": 30.581205368041992, "std": 27.596582412719727, "min": -15.604284286499023, "p10": -0.7595949172973632, "median": 25.80141258239746, "p90": 68.98250579833986, "max": 99.910400390625, "pos_frac": 0.875, "sample": [38.50349044799805, 62.854522705078125, 25.200889587402344, -2.881786346435547, 59.22441864013672, 1.1770668029785156, 15.78924560546875, 3.8502159118652344, 1.6703853607177734, 36.593177795410156, 9.74453353881836, 44.250160217285156, 31.4708251953125, 4.373292922973633, 32.19746780395508, 42.97876739501953, 1.3969001770019531, 34.78291320800781, 18.323043823242188, 15.104938507080078, 1.7452774047851562, 22.288909912109375, 56.50956726074219, 8.735553741455078, 49.70497131347656, 99.910400390625, -1.0286197662353516, 81.81370544433594, 35.97405242919922, 10.549201965332031, 13.006265640258789, 31.715797424316406, 0.6088485717773438, 24.320877075195312, 64.45268249511719, 26.401935577392578, 6.846595764160156, 12.438941955566406, 7.7163848876953125, -2.4179458618164062, -0.6443386077880859, 57.680511474609375, -15.604284286499023, 77.5252685546875, 20.710481643676758, 76.79899597167969, 1.9376029968261719, -2.820995330810547, 46.87062072753906, 54.375457763671875, 70.92385864257812, 88.78900146484375, 51.886451721191406, 55.19872283935547, 60.076934814453125, 41.666748046875, 82.43949890136719, 21.46044921875, 29.204254150390625, -1.2178211212158203, -0.808990478515625, 11.261207580566406, 51.96764373779297, 49.62190246582031], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000587.npy"}
|
|
{"epoch": 0.8619676945668135, "step": 588, "batch_size": 64, "mean": 32.04176330566406, "std": 28.54863929748535, "min": -15.686531066894531, "p10": -0.9694806098937987, "median": 27.937307357788086, "p90": 80.6527755737305, "max": 103.32987976074219, "pos_frac": 0.875, "sample": [31.195886611938477, 19.868560791015625, 10.7421875, 20.62070083618164, 41.446990966796875, 58.550872802734375, 37.84598159790039, 23.435422897338867, 33.906402587890625, 56.93330383300781, -15.686531066894531, 21.196510314941406, 8.196125030517578, 6.796113967895508, -5.178886413574219, 36.45833206176758, 6.3531036376953125, 51.55815887451172, -1.1775894165039062, 24.263320922851562, 34.79705810546875, 26.772201538085938, -13.926856994628906, 51.37837219238281, 2.0860443115234375, 15.40423583984375, 86.14175415039062, 21.850337982177734, 14.255157470703125, 14.633827209472656, 36.76939010620117, 3.020803451538086, 12.545364379882812, 84.73355102539062, 53.16007995605469, 6.83326530456543, 67.67726135253906, 14.689216613769531, 17.86246109008789, 67.63256072998047, 83.58330535888672, 34.241939544677734, 12.337631225585938, 35.01495361328125, 55.3040771484375, -7.837059020996094, 16.14508819580078, 85.20172119140625, -5.135009765625, 64.57546997070312, -1.0401973724365234, 84.01055908203125, 85.81671142578125, 38.384613037109375, 73.81487274169922, 41.55164337158203, 29.47727394104004, 15.717174530029297, -0.8044748306274414, 55.443809509277344, 35.325355529785156, 1.4702682495117188, 103.32987976074219, 29.102413177490234], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000588.npy"}
|
|
{"epoch": 0.8634361233480177, "step": 589, "batch_size": 64, "mean": 44.363983154296875, "std": 33.08956527709961, "min": -21.234203338623047, "p10": 3.367512512207033, "median": 42.10517883300781, "p90": 94.13206253051759, "max": 133.8654022216797, "pos_frac": 0.921875, "sample": [71.2660140991211, 56.591514587402344, 41.76243591308594, -2.396343231201172, 38.041473388671875, -3.5407180786132812, 11.195423126220703, 94.60340118408203, 43.00630187988281, 21.468666076660156, 31.603912353515625, 22.879135131835938, 49.277687072753906, 10.576286315917969, 18.382408142089844, 116.78683471679688, 57.292327880859375, -7.921600341796875, 26.822280883789062, 44.3276252746582, 70.39220428466797, 21.34259033203125, -6.45111083984375, 101.60272979736328, 27.200836181640625, 15.631240844726562, 35.987823486328125, 30.15850830078125, 25.941238403320312, 21.43699836730957, 113.25706481933594, 70.72862243652344, 56.690223693847656, 55.54195022583008, 93.03227233886719, 36.12492370605469, 44.007080078125, 16.22570037841797, 63.308189392089844, 2.68658447265625, 69.59013366699219, 4.9563446044921875, 48.00331115722656, 42.44792175292969, 58.34016036987305, 32.61220169067383, 112.97628784179688, 22.308563232421875, 64.79576873779297, 133.8654022216797, 111.51055908203125, 42.65809631347656, 38.566917419433594, 38.842559814453125, 21.307144165039062, 30.41885757446289, 70.54997253417969, 48.017120361328125, 2.334430694580078, 53.296234130859375, 67.71184539794922, 44.64720153808594, -21.234203338623047, 63.901466369628906], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000589.npy"}
|
|
{"epoch": 0.8649045521292217, "step": 590, "batch_size": 64, "mean": 38.70075607299805, "std": 28.155410766601562, "min": -14.5260009765625, "p10": 8.757490539550782, "median": 35.202884674072266, "p90": 84.04414978027344, "max": 111.41190338134766, "pos_frac": 0.96875, "sample": [8.221893310546875, 111.41190338134766, 64.25003051757812, 39.379085540771484, 21.42511749267578, 9.929115295410156, 42.70819091796875, 10.52471923828125, 67.2286605834961, 22.96160888671875, 10.387163162231445, 7.060420989990234, 14.96533203125, 72.96366882324219, 82.91470336914062, 23.505496978759766, 13.35396957397461, -6.042877197265625, 34.79646301269531, 37.27685546875, 36.86996078491211, 18.35301971435547, 17.451614379882812, 34.45115661621094, 16.706764221191406, 9.335319519042969, 62.095436096191406, 39.471290588378906, 87.42034912109375, 7.980556488037109, -14.5260009765625, 57.362274169921875, 30.112918853759766, 89.26422119140625, 42.15789794921875, 59.313621520996094, 41.35194396972656, 54.77593994140625, 19.237224578857422, 14.69588851928711, 20.05294418334961, 38.724727630615234, 15.455106735229492, 5.3988800048828125, 26.923919677734375, 13.823806762695312, 32.8624267578125, 8.509849548339844, 20.894073486328125, 86.38883209228516, 110.30488586425781, 56.505882263183594, 35.3177490234375, 45.43733215332031, 35.753204345703125, 58.23271179199219, 39.83876037597656, 87.4273681640625, 59.57818603515625, 84.5281982421875, 62.08296203613281, 35.08802032470703, 17.481914520263672, 67.13384246826172], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000590.npy"}
|
|
{"epoch": 0.8663729809104258, "step": 591, "batch_size": 64, "mean": 36.54948425292969, "std": 25.83894157409668, "min": -17.789215087890625, "p10": 5.315490722656251, "median": 38.88365936279297, "p90": 66.78363342285158, "max": 95.29208374023438, "pos_frac": 0.90625, "sample": [6.39129638671875, 95.29208374023438, 84.81503295898438, 55.43115997314453, 70.0567626953125, 26.330825805664062, 49.006935119628906, 15.801704406738281, 33.060707092285156, 85.97422790527344, 25.03851318359375, 35.76232147216797, 55.63414764404297, 22.238258361816406, 50.901611328125, 60.32355499267578, 63.51764678955078, 55.27734375, 47.59584045410156, 47.78538513183594, 18.001708984375, 56.901092529296875, -1.709207534790039, 13.482986450195312, 52.47785949707031, 18.337379455566406, 13.904451370239258, 63.0736083984375, 50.69691467285156, 24.352340698242188, -15.10307502746582, 39.181793212890625, 57.2507438659668, -10.60150146484375, 19.24592399597168, 21.332191467285156, 4.85443115234375, 11.402250289916992, 58.51176452636719, 10.88502311706543, -17.789215087890625, 53.01171112060547, 51.03097915649414, 54.178932189941406, 14.622276306152344, 48.3646240234375, 69.56891632080078, 13.379589080810547, 29.175960540771484, 38.58552551269531, 68.18334197998047, 40.022926330566406, 44.210899353027344, -1.699859619140625, 62.73553466796875, 36.26154327392578, 73.9940185546875, -13.568260192871094, 40.77324676513672, 25.023704528808594, 13.682907104492188, 9.346000671386719, 58.35105895996094, 35.006500244140625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000591.npy"}
|
|
{"epoch": 0.8678414096916299, "step": 592, "batch_size": 64, "mean": 35.76453399658203, "std": 29.050722122192383, "min": -38.77819061279297, "p10": -1.3484394073486323, "median": 41.81907272338867, "p90": 66.41814804077148, "max": 121.98455810546875, "pos_frac": 0.859375, "sample": [64.75821685791016, 76.87496948242188, 9.557584762573242, 56.89288330078125, -1.5548553466796875, -0.45538330078125, 42.43981170654297, 43.75212097167969, 83.78643798828125, 52.78429412841797, 20.378448486328125, 49.50031280517578, -0.8668022155761719, 43.25944519042969, -13.614044189453125, 32.96418380737305, 66.95012664794922, 43.74791717529297, 46.254539489746094, 121.98455810546875, 32.81481170654297, 24.934680938720703, 13.849647521972656, 41.198333740234375, -5.381704330444336, 6.843019485473633, 15.545291900634766, 75.4185791015625, 39.02894973754883, 60.8376350402832, 15.27164077758789, 7.574855804443359, 30.815597534179688, 47.78053283691406, 69.74111938476562, 5.932365417480469, 45.93196105957031, 27.304603576660156, 6.5628204345703125, 65.17686462402344, 54.9718017578125, 30.638755798339844, 53.12439727783203, 41.162200927734375, 12.413330078125, -20.840179443359375, 31.966964721679688, -29.53992462158203, 53.02655029296875, 62.747215270996094, 54.11039733886719, 48.10215759277344, 47.398216247558594, 45.20733642578125, -11.004467010498047, 57.899864196777344, 35.15742492675781, 31.00402069091797, 75.4734878540039, 44.37406921386719, -38.77819061279297, 48.2935791015625, 39.37806701660156, 56.06682586669922], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000592.npy"}
|
|
{"epoch": 0.869309838472834, "step": 593, "batch_size": 64, "mean": 29.934890747070312, "std": 28.498645782470703, "min": -28.137195587158203, "p10": -5.1270341873168945, "median": 27.45668125152588, "p90": 65.5358154296875, "max": 103.80619812011719, "pos_frac": 0.828125, "sample": [1.3456954956054688, -2.57366943359375, -1.419647216796875, -10.355865478515625, 15.218109130859375, 21.035751342773438, 39.83082580566406, 13.44961929321289, 54.802330017089844, 63.202362060546875, 24.54869842529297, 71.77754211425781, 24.3436279296875, 5.818572998046875, 8.024978637695312, 13.686960220336914, 103.2721939086914, 66.45137023925781, 38.54736328125, 35.585811614990234, 48.612613677978516, 28.811630249023438, 33.14155578613281, 25.89777374267578, 38.17581558227539, 103.80619812011719, 28.81653594970703, 47.845916748046875, 53.13917541503906, 12.678956985473633, -4.847675323486328, 60.900203704833984, 9.479696273803711, -2.9861793518066406, 36.22193145751953, 87.58817291259766, 9.161163330078125, 44.34138488769531, 9.442649841308594, 14.740299224853516, -9.191360473632812, 56.17158508300781, -24.02617645263672, 30.772491455078125, 26.10173225402832, 17.616485595703125, 16.993654251098633, 44.527740478515625, 73.68574523925781, 46.65681457519531, 14.4139404296875, 25.813926696777344, 72.021240234375, 53.26123809814453, 48.50177764892578, 37.087867736816406, 63.39952087402344, -5.246759414672852, -7.524370193481445, 34.41278839111328, -9.062324523925781, -28.137195587158203, 22.177989959716797, 43.8442268371582], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000593.npy"}
|
|
{"epoch": 0.8707782672540382, "step": 594, "batch_size": 64, "mean": 39.89767837524414, "std": 35.56900405883789, "min": -43.52124786376953, "p10": 2.287495040893556, "median": 32.52593231201172, "p90": 93.06329956054691, "max": 123.5042724609375, "pos_frac": 0.921875, "sample": [75.03778076171875, 1.772857666015625, 4.047513961791992, 1.4643611907958984, 11.004631042480469, 22.725868225097656, 96.6720199584961, 26.496700286865234, 24.213275909423828, 3.4883155822753906, 26.333560943603516, 78.46131134033203, 83.70964050292969, 69.49751281738281, 27.34078598022461, 20.3016357421875, 67.83544921875, 34.80656433105469, 5.3207550048828125, -14.489139556884766, 46.617103576660156, 22.477508544921875, -0.8627166748046875, 84.64295196533203, 11.98086929321289, 65.56695556640625, -12.347015380859375, 55.588844299316406, 57.15023422241211, 37.75062561035156, 15.401412963867188, 56.6746826171875, 52.221099853515625, 47.254417419433594, 67.27723693847656, 49.00244903564453, 26.797527313232422, -1.2255439758300781, 32.926780700683594, -43.52124786376953, 7.873989105224609, 99.28671264648438, 37.017730712890625, 58.29230499267578, 32.125083923339844, 96.780517578125, 81.11680603027344, 59.402130126953125, 30.841781616210938, 28.149295806884766, 12.581169128417969, 5.9188079833984375, 36.75345230102539, 10.252922058105469, 108.80105590820312, 5.527591705322266, 46.12986755371094, 114.28829956054688, 118.8553466796875, 24.68657875061035, 123.5042724609375, 9.117067337036133, 15.959487915039062, 52.773643493652344], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000594.npy"}
|
|
{"epoch": 0.8722466960352423, "step": 595, "batch_size": 64, "mean": 32.23430633544922, "std": 27.126323699951172, "min": -6.165813446044922, "p10": -0.36096076965332013, "median": 26.151134490966797, "p90": 69.54967041015627, "max": 124.7703857421875, "pos_frac": 0.875, "sample": [-3.4425621032714844, 28.971282958984375, 22.557048797607422, 27.135793685913086, -2.109973907470703, 70.70005798339844, 15.703813552856445, 56.83185577392578, -0.6159172058105469, 32.40089416503906, 24.448287963867188, 25.418087005615234, -0.1694049835205078, 66.86543273925781, 4.416664123535156, 48.03999328613281, 24.69322395324707, 124.7703857421875, 46.25336456298828, 23.683597564697266, 11.403865814208984, 17.738300323486328, 26.581687927246094, 20.04094696044922, 11.660598754882812, 13.03708267211914, 20.29971694946289, 28.026391983032227, 32.04512023925781, 54.00879669189453, -0.4430561065673828, 34.792579650878906, 39.55809783935547, 25.7205810546875, 72.29046630859375, 47.07508087158203, 25.022552490234375, 39.78791427612305, 9.836509704589844, 64.21726989746094, 15.310333251953125, 37.44926834106445, 39.4287109375, -3.7113418579101562, -1.1514739990234375, 38.51613235473633, 20.539691925048828, 4.006462097167969, 34.82410430908203, 64.25530242919922, 11.039703369140625, 98.2340087890625, -6.165813446044922, 34.93373107910156, 23.27634048461914, 35.88240051269531, 44.31049346923828, 14.375396728515625, 15.087448120117188, 2.2027854919433594, 77.02146911621094, 54.8012809753418, 89.0314712524414, 90.245361328125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000595.npy"}
|
|
{"epoch": 0.8737151248164464, "step": 596, "batch_size": 64, "mean": 34.995357513427734, "std": 30.359037399291992, "min": -25.180879592895508, "p10": -1.2824281692504873, "median": 32.723670959472656, "p90": 77.81425094604492, "max": 111.18344116210938, "pos_frac": 0.875, "sample": [-7.119140625, -1.7886962890625, 60.328006744384766, -5.627403259277344, 27.89263916015625, 77.59355163574219, 2.2541885375976562, 69.2437744140625, 32.938720703125, -0.25730323791503906, 77.9088363647461, 62.866912841796875, 13.130752563476562, 62.09300994873047, 53.86530303955078, 82.1135025024414, 29.42436981201172, 17.621280670166016, 57.17272186279297, 76.73582458496094, 2.259571075439453, 81.4970703125, 23.63863754272461, 25.66356658935547, 26.18860626220703, 106.83212280273438, -3.992433547973633, 34.061431884765625, 48.09088897705078, 5.174476623535156, 35.19280242919922, 20.813669204711914, 11.648277282714844, 88.77093505859375, 7.337747573852539, 15.774505615234375, 64.7754135131836, 43.82014846801758, 54.21266174316406, -1.7217674255371094, 39.040199279785156, 7.426664352416992, 43.82605743408203, 22.984939575195312, 36.354644775390625, 72.80015563964844, 33.48731994628906, 25.479080200195312, 32.50862121582031, 40.89616394042969, 11.468246459960938, 5.546442031860352, -8.369792938232422, 57.858612060546875, 12.253952026367188, -25.180879592895508, 35.75738525390625, 16.93536376953125, 42.448875427246094, 111.18344116210938, 82.29814147949219, 2.1825618743896484, 46.120826721191406, 15.966764450073242], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000596.npy"}
|
|
{"epoch": 0.8751835535976505, "step": 597, "batch_size": 64, "mean": 35.32228469848633, "std": 31.88015365600586, "min": -21.089923858642578, "p10": -2.174057388305664, "median": 29.974140167236328, "p90": 75.38216476440431, "max": 119.52737426757812, "pos_frac": 0.859375, "sample": [25.47442626953125, 33.90740203857422, 29.605453491210938, 41.71091079711914, 39.3201904296875, 119.52737426757812, 6.170093536376953, 48.135887145996094, 21.46938705444336, 31.4545955657959, 11.538360595703125, 20.669715881347656, 9.842185974121094, -1.9275550842285156, 30.274459838867188, -4.025276184082031, 26.206363677978516, 36.244873046875, 52.851165771484375, 96.99240112304688, 16.427217483520508, 20.461257934570312, 9.924331665039062, 59.383697509765625, 70.7820053100586, 12.834342956542969, -4.6646270751953125, 4.806205749511719, 60.8980712890625, 11.119491577148438, -13.413780212402344, -16.493207931518555, 46.943511962890625, 22.75897216796875, 29.67382049560547, 79.05339050292969, 35.88275146484375, -1.5731887817382812, -12.469465255737305, 21.716407775878906, 71.54598999023438, 65.67143249511719, 2.910430908203125, 71.3884048461914, 3.9452896118164062, 51.63746643066406, 44.764713287353516, 58.309906005859375, 28.34368896484375, 76.85539245605469, 71.94463348388672, -2.2797012329101562, -21.089923858642578, 109.94485473632812, 63.8565673828125, 7.352714538574219, 88.30950164794922, 20.934906005859375, 63.128814697265625, 68.98904418945312, 84.91926574707031, 30.93193817138672, 14.829055786132812, 53.98817443847656], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000597.npy"}
|
|
{"epoch": 0.8766519823788547, "step": 598, "batch_size": 64, "mean": 32.456298828125, "std": 25.231464385986328, "min": -33.41507339477539, "p10": 7.427731132507325, "median": 32.41542434692383, "p90": 62.44279937744141, "max": 94.62224578857422, "pos_frac": 0.921875, "sample": [66.09819030761719, 80.36026000976562, 16.079120635986328, 55.319068908691406, 13.653030395507812, 60.879730224609375, 52.30989074707031, 54.256248474121094, 9.451099395751953, 20.74938201904297, 18.884727478027344, -6.449008941650391, -33.41507339477539, 36.012306213378906, 56.76158142089844, -9.683610916137695, 53.79810333251953, 22.266677856445312, 45.64982986450195, 20.670835494995117, 14.002296447753906, 33.179237365722656, 81.43327331542969, 30.145549774169922, 50.75498962402344, 35.760902404785156, 79.83354187011719, 33.72811508178711, -15.586051940917969, 7.156637191772461, 9.74078369140625, 37.71784210205078, 13.389150619506836, 15.3475341796875, 8.060283660888672, 50.09294509887695, 15.356563568115234, 35.008941650390625, 15.961074829101562, 40.03448486328125, 21.908836364746094, 28.89270782470703, 2.6153793334960938, 61.072845458984375, -7.832820892333984, 57.27449035644531, 35.33148956298828, 10.033731460571289, 14.563837051391602, 41.19251251220703, 18.35118865966797, 31.651611328125, 94.62224578857422, 51.79002380371094, 36.160614013671875, 9.105661392211914, 53.2237548828125, 63.02992248535156, 54.77180480957031, 41.6988525390625, 28.21075439453125, 17.784536361694336, 67.13439178466797, 19.84427261352539], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000598.npy"}
|
|
{"epoch": 0.8781204111600588, "step": 599, "batch_size": 64, "mean": 32.661949157714844, "std": 25.800249099731445, "min": -13.721504211425781, "p10": -0.809971046447752, "median": 31.273252487182617, "p90": 69.87661743164062, "max": 83.75395202636719, "pos_frac": 0.890625, "sample": [48.22063446044922, 1.801483154296875, 26.041168212890625, -8.793586730957031, 21.749229431152344, 79.41790771484375, 54.93902587890625, 35.2022705078125, 50.408180236816406, 28.297470092773438, 71.11836242675781, 71.59242248535156, 6.569616317749023, 80.29075622558594, 1.111825942993164, 26.072158813476562, 83.75395202636719, 34.53150177001953, 18.586700439453125, 32.668212890625, 15.045318603515625, 48.91089630126953, 44.49285125732422, -5.803958892822266, 39.610443115234375, 4.094911575317383, 32.2958984375, 52.09524917602539, 1.9711265563964844, 25.677452087402344, 69.10394287109375, -9.970169067382812, 51.249961853027344, 47.82270812988281, 41.70024108886719, 83.1680679321289, 53.58038330078125, 19.331214904785156, 61.11042022705078, 10.781166076660156, 55.07611846923828, 17.753082275390625, 70.207763671875, 51.67856216430664, 28.692916870117188, 14.531471252441406, 52.851661682128906, -13.721504211425781, 45.032936096191406, 21.895736694335938, 23.5828857421875, 3.3974132537841797, -3.7478103637695312, -1.6335983276367188, 17.38592529296875, 39.499298095703125, 47.81296157836914, 22.923492431640625, 45.492958068847656, 66.05470275878906, 4.2212982177734375, -8.466251373291016, 30.250606536865234, 9.744739532470703], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000599.npy"}
|
|
{"epoch": 0.8795888399412628, "step": 600, "batch_size": 64, "mean": 30.076370239257812, "std": 25.40629768371582, "min": -29.105804443359375, "p10": 6.32385540008545, "median": 27.786548614501953, "p90": 68.98934631347657, "max": 103.53346252441406, "pos_frac": 0.921875, "sample": [24.7930850982666, 12.828899383544922, 37.0179443359375, -24.881027221679688, 55.80451202392578, 13.215133666992188, 30.103878021240234, 12.218246459960938, 7.007081985473633, 54.51679229736328, 17.477012634277344, 25.83094024658203, 41.32173156738281, 11.620641708374023, 38.20699691772461, 74.97270202636719, 11.88101577758789, 18.06958770751953, 15.242504119873047, 11.446224212646484, 52.381439208984375, 75.38541412353516, 18.486892700195312, 6.027929306030273, 18.60296630859375, 36.5919189453125, 21.04686737060547, 103.53346252441406, 81.63162231445312, 30.656848907470703, 19.05796241760254, 64.92825317382812, 70.1064453125, 30.4520206451416, 66.38278198242188, 6.031044006347656, -12.89874267578125, 30.9161376953125, -9.136819839477539, 35.870849609375, 41.83038330078125, 42.85975646972656, 20.75341796875, 23.981491088867188, 55.71528625488281, 16.107765197753906, 35.25088882446289, 34.0533561706543, 10.183197021484375, 41.54798889160156, 16.957847595214844, 41.56836700439453, 74.96381378173828, 42.834869384765625, 36.37432098388672, 73.07698059082031, -29.105804443359375, 29.742156982421875, 14.749471664428711, -2.4287185668945312, 17.37078857421875, 10.731834411621094, 10.60799789428711, 30.410940170288086], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000600.npy"}
|
|
{"epoch": 0.8810572687224669, "step": 601, "batch_size": 64, "mean": 34.12665939331055, "std": 29.32499885559082, "min": -24.4970703125, "p10": -1.2176856994628893, "median": 30.437349319458008, "p90": 69.75142822265626, "max": 114.76986694335938, "pos_frac": 0.890625, "sample": [3.7620620727539062, 18.405471801757812, 114.76986694335938, 62.29195785522461, 9.011078834533691, 64.53580474853516, 33.12590026855469, 27.053680419921875, 30.752925872802734, 55.864845275878906, 24.488365173339844, 9.279434204101562, 67.53414916992188, 24.070724487304688, 50.843292236328125, 27.293006896972656, 20.29453468322754, 55.81732177734375, 24.119972229003906, 61.759788513183594, 27.252792358398438, 72.23069763183594, -24.4970703125, 29.84466552734375, 27.712120056152344, 53.28388595581055, 1.2150020599365234, 37.1907958984375, 62.643775939941406, 34.031105041503906, 7.947715759277344, 38.76466369628906, -4.712127685546875, 8.07968521118164, 26.723220825195312, 70.70169067382812, 13.929862976074219, 72.15985870361328, 31.033294677734375, 25.117538452148438, 0.187286376953125, 30.12177276611328, -1.8198165893554688, -6.8827362060546875, 26.891143798828125, -16.54458999633789, 2.422515869140625, 51.48710632324219, 62.754249572753906, -13.326667785644531, 39.43742370605469, 76.78056335449219, 34.82609558105469, 43.80656433105469, -20.675296783447266, 108.00767517089844, 28.623350143432617, 40.60894775390625, 37.1129150390625, 60.066192626953125, 96.43248748779297, 11.063934326171875, 54.741424560546875, 42.256317138671875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000601.npy"}
|
|
{"epoch": 0.882525697503671, "step": 602, "batch_size": 64, "mean": 37.70860290527344, "std": 28.099163055419922, "min": -14.713577270507812, "p10": 2.2413087844848634, "median": 36.126731872558594, "p90": 75.73837509155275, "max": 131.0216522216797, "pos_frac": 0.921875, "sample": [44.640533447265625, 15.379325866699219, -1.97296142578125, 8.036182403564453, 28.100187301635742, 41.58158874511719, 29.48340606689453, 60.094696044921875, 2.2260589599609375, 25.73461151123047, 54.045921325683594, 37.07283020019531, 21.542436599731445, 30.61760711669922, 82.02513122558594, 20.02304458618164, 71.76248168945312, 68.88758850097656, 77.44232940673828, 39.25389099121094, 30.891510009765625, 19.295368194580078, -4.9249267578125, 7.791191101074219, 2.2141590118408203, 56.823394775390625, 53.47917938232422, 10.359630584716797, -1.860198974609375, 2.2768917083740234, 8.407066345214844, 33.50921630859375, 53.038665771484375, 23.080501556396484, 40.02393341064453, 68.4837646484375, 12.195693969726562, 38.00135040283203, 26.550086975097656, 4.717578887939453, 94.20051574707031, 56.78590393066406, 81.21250915527344, 35.180633544921875, 34.93971252441406, 79.08866882324219, 131.0216522216797, -11.6102294921875, 47.81700897216797, 18.87921142578125, 63.04985809326172, 49.59544372558594, 57.50450897216797, 43.140159606933594, 61.7091064453125, 39.68592834472656, -14.713577270507812, 17.125877380371094, 32.76074981689453, 39.50608825683594, 63.297119140625, 78.91370391845703, 43.73876953125, 30.190231323242188], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000602.npy"}
|
|
{"epoch": 0.8839941262848752, "step": 603, "batch_size": 64, "mean": 32.04313659667969, "std": 26.283693313598633, "min": -14.387336730957031, "p10": 0.5817659378051763, "median": 32.51904296875, "p90": 66.30594940185547, "max": 95.30891418457031, "pos_frac": 0.90625, "sample": [9.16522216796875, 25.37274742126465, 43.57965087890625, 1.0159320831298828, 95.30891418457031, 18.660430908203125, 62.46348571777344, 60.211219787597656, 23.506179809570312, 66.71331787109375, 35.49022674560547, 43.235939025878906, -11.655136108398438, 74.65619659423828, 9.600906372070312, 38.352508544921875, 18.263275146484375, 56.30403137207031, 14.1605224609375, 37.90293884277344, 65.19720458984375, 26.601036071777344, 35.784400939941406, 12.023475646972656, 21.869842529296875, 38.30217742919922, 69.68780517578125, 36.430545806884766, 44.50645446777344, 25.53313446044922, 38.34599685668945, -11.371570587158203, 48.13377380371094, -14.387336730957031, 24.904518127441406, 65.35542297363281, 84.30320739746094, 1.5115509033203125, 0.3956947326660156, 19.810333251953125, 44.644622802734375, 39.83153533935547, 1.5325241088867188, 8.366024017333984, -2.3570213317871094, 24.408340454101562, 8.576797485351562, 14.399377822875977, 4.5867919921875, 49.10345458984375, -11.112096786499023, 52.442176818847656, 29.54785919189453, 10.4888916015625, 42.49042892456055, -4.9643402099609375, 48.6978759765625, 48.891456604003906, 57.994407653808594, 13.586669921875, 76.34245300292969, 46.7260627746582, 84.30973815917969, 6.980587005615234], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000603.npy"}
|
|
{"epoch": 0.8854625550660793, "step": 604, "batch_size": 64, "mean": 35.393802642822266, "std": 29.887229919433594, "min": -26.029582977294922, "p10": 2.5117891311645537, "median": 32.554237365722656, "p90": 71.26539535522463, "max": 118.86788940429688, "pos_frac": 0.90625, "sample": [18.201574325561523, 9.363059997558594, 27.09471893310547, 1.2892608642578125, 67.32255554199219, 44.06249237060547, 50.38340759277344, 28.250320434570312, 8.351783752441406, 15.190681457519531, -7.095367431640625, 72.48963165283203, 17.05475616455078, -6.930931091308594, 61.44618225097656, 26.456817626953125, 35.157989501953125, 5.364355087280273, 49.31262969970703, 63.92451477050781, 34.42412185668945, -0.9449005126953125, 6.431800842285156, 12.267173767089844, -7.538320541381836, -26.029582977294922, 36.16417694091797, 8.985164642333984, 64.0881118774414, 13.1942138671875, 9.757240295410156, 29.403839111328125, 18.88855743408203, 42.03437042236328, 49.21574783325195, 68.40884399414062, 26.110137939453125, 7.940803527832031, 80.50869750976562, 14.0552978515625, 37.141727447509766, 32.06212615966797, 68.2630386352539, -21.639434814453125, 74.66512298583984, 84.9442138671875, 53.589256286621094, 118.86788940429688, 39.08287811279297, 36.917449951171875, 39.16064453125, 47.866859436035156, 113.63299560546875, 28.267833709716797, 50.208885192871094, 46.81093215942383, 30.402793884277344, 13.862548828125, 59.064273834228516, 31.85382843017578, 33.046348571777344, 29.713546752929688, 106.64500427246094, 36.71664810180664], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000604.npy"}
|
|
{"epoch": 0.8869309838472834, "step": 605, "batch_size": 64, "mean": 31.686960220336914, "std": 30.38180923461914, "min": -27.89897918701172, "p10": 3.4804565429687506, "median": 26.019160270690918, "p90": 69.82191925048829, "max": 156.74188232421875, "pos_frac": 0.953125, "sample": [19.56053924560547, 28.291046142578125, 8.016876220703125, 23.27449607849121, 44.583961486816406, 25.091598510742188, 18.155534744262695, 67.01567077636719, 75.16107940673828, 17.047443389892578, 35.988525390625, 13.713165283203125, 10.278837203979492, 20.52655792236328, 29.047046661376953, 57.52874755859375, 46.29804229736328, 81.283203125, 71.02459716796875, 39.923255920410156, 156.74188232421875, 16.066604614257812, 3.1842193603515625, 48.86293029785156, 31.897377014160156, 13.678329467773438, 39.133209228515625, 11.115287780761719, -27.89897918701172, 2.4475021362304688, 18.596328735351562, 2.1431846618652344, 44.776145935058594, -24.780487060546875, 2.968372344970703, 20.436004638671875, 108.19694519042969, 31.447050094604492, 4.1716766357421875, 95.15560913085938, 32.253360748291016, 102.61080932617188, 20.294790267944336, 7.534526824951172, 23.668701171875, 26.94672203063965, 11.97274398803711, 15.85776138305664, 7.277580261230469, 19.430246353149414, 20.223915100097656, 41.36688232421875, 27.221824645996094, 51.69105529785156, -5.111490249633789, 23.084672927856445, 46.752235412597656, 8.882259368896484, 36.5297966003418, 27.305709838867188, 28.52825927734375, 32.94408416748047, 45.090049743652344, 45.45953369140625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000605.npy"}
|
|
{"epoch": 0.8883994126284875, "step": 606, "batch_size": 64, "mean": 36.67774963378906, "std": 26.492900848388672, "min": -3.8375320434570312, "p10": 5.680474472045899, "median": 36.77361488342285, "p90": 70.83138427734376, "max": 123.25448608398438, "pos_frac": 0.96875, "sample": [-0.9348793029785156, 4.040679931640625, 52.04517364501953, 10.99897575378418, 59.847572326660156, 56.28143310546875, 9.31039047241211, 14.160629272460938, 69.81092834472656, 66.04307556152344, 26.61341094970703, 123.25448608398438, 39.006988525390625, -3.8375320434570312, 15.7664794921875, 42.75115966796875, 14.253547668457031, 5.547344207763672, 66.71092224121094, 53.56865692138672, 50.37080001831055, 15.32927131652832, 50.10862731933594, 0.59637451171875, 77.46087646484375, 89.67227172851562, 13.256988525390625, 9.966033935546875, 13.747777938842773, 31.857284545898438, 14.542404174804688, 31.476112365722656, 13.129789352416992, 12.553787231445312, 1.0646743774414062, 4.114189147949219, 41.77317810058594, 63.11565399169922, 6.965982437133789, 9.656890869140625, 39.81366729736328, 50.514686584472656, 71.74560546875, 51.971290588378906, 47.96429443359375, 50.7138671875, 62.03748321533203, 44.84642028808594, 71.26872253417969, 20.284954071044922, 23.230667114257812, 54.918907165527344, 21.031112670898438, 42.54792785644531, 34.54024124145508, 65.9830322265625, 39.5592041015625, 30.088584899902344, 25.055908203125, 78.061767578125, 72.41902160644531, 20.336212158203125, 46.45271301269531, 5.991111755371094], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000606.npy"}
|
|
{"epoch": 0.8898678414096917, "step": 607, "batch_size": 64, "mean": 32.44065856933594, "std": 23.698347091674805, "min": -10.40713119506836, "p10": 4.845211219787598, "median": 31.25627326965332, "p90": 61.344248962402354, "max": 100.00485229492188, "pos_frac": 0.953125, "sample": [58.00492858886719, 70.0317611694336, 50.49311828613281, 11.095037460327148, 40.17594909667969, 32.71983337402344, 24.830184936523438, 100.00485229492188, 17.063697814941406, 37.17278289794922, -10.40713119506836, 37.267356872558594, 31.064620971679688, 7.130195617675781, 33.8835563659668, 96.81060791015625, 7.957256317138672, 28.92887306213379, 18.52685546875, 22.292085647583008, 10.387344360351562, 4.813295364379883, 54.296485900878906, 19.586883544921875, 4.919681549072266, 51.63328552246094, 4.180694580078125, 31.447925567626953, 14.96929931640625, 37.26868438720703, 44.84303283691406, 30.570159912109375, 70.5453109741211, 32.247802734375, 51.126991271972656, 41.910545349121094, 27.88409423828125, 2.506744384765625, 44.55385971069336, 39.9028434753418, 12.726791381835938, 74.45989990234375, 4.30401611328125, 62.039207458496094, 16.06249237060547, 10.725566864013672, 13.556037902832031, 40.738059997558594, 50.88667297363281, 53.59880828857422, 19.95196533203125, 44.506805419921875, 30.103378295898438, 32.46833038330078, -7.7268829345703125, 19.982025146484375, 52.47504425048828, 74.41314697265625, 10.103607177734375, 37.510406494140625, 11.599140167236328, -6.108325958251953, 59.722679138183594, 23.461843490600586], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000607.npy"}
|
|
{"epoch": 0.8913362701908958, "step": 608, "batch_size": 64, "mean": 33.237815856933594, "std": 26.6564884185791, "min": -25.257266998291016, "p10": 0.7183568954467783, "median": 33.07780075073242, "p90": 73.3316246032715, "max": 85.30990600585938, "pos_frac": 0.90625, "sample": [83.35868072509766, 24.413429260253906, 14.718513488769531, 1.5874595642089844, 29.019371032714844, 56.55963897705078, 28.96075439453125, 35.144466400146484, 46.6161003112793, 51.90650939941406, 55.516578674316406, 0.3458843231201172, 34.72418975830078, 39.30885314941406, -4.1858062744140625, 16.585805892944336, -5.152923583984375, 59.8511962890625, 8.957832336425781, 40.57539367675781, 71.15837860107422, 38.059600830078125, 20.397491455078125, 19.152015686035156, -15.659149169921875, 19.757518768310547, 18.52294921875, 41.195579528808594, 67.05799865722656, 4.919303894042969, 50.03671646118164, 6.6072540283203125, 53.33872985839844, 5.869499206542969, 30.44317626953125, 41.3280029296875, 19.418807983398438, -20.204010009765625, 41.354888916015625, -2.2163848876953125, 75.81419372558594, 42.19859313964844, 85.30990600585938, 36.952545166015625, 14.750541687011719, 16.732376098632812, 6.4394989013671875, 34.12297058105469, 47.72673797607422, 9.999271392822266, 57.40034484863281, 12.280731201171875, 27.405685424804688, 61.48651123046875, -25.257266998291016, 29.142578125, 17.291828155517578, 49.875396728515625, 32.032630920410156, 83.8514404296875, 74.26301574707031, 76.68572998046875, 52.752410888671875, 78.61231231689453], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000608.npy"}
|
|
{"epoch": 0.8928046989720999, "step": 609, "batch_size": 64, "mean": 33.63862228393555, "std": 23.901885986328125, "min": -16.501113891601562, "p10": 3.3186065673828145, "median": 33.712440490722656, "p90": 63.36136856079102, "max": 101.17274475097656, "pos_frac": 0.953125, "sample": [65.38839721679688, 21.552032470703125, 11.884008407592773, 21.286453247070312, 26.67548370361328, 67.64089965820312, 50.613037109375, 24.247421264648438, 37.821231842041016, 20.462692260742188, 46.89109420776367, 57.52342224121094, 8.304183959960938, 26.33224868774414, 34.98974609375, -16.501113891601562, 2.51654052734375, 38.16802978515625, 42.86864471435547, 22.21167755126953, 18.79693603515625, 38.61785888671875, 39.83007049560547, 59.572303771972656, 58.73283386230469, 52.87725067138672, 62.478736877441406, 12.281494140625, 30.51443099975586, 50.821800231933594, 34.40215301513672, 17.011436462402344, 48.304290771484375, 51.473426818847656, 81.98455810546875, 63.73963928222656, -8.117897033691406, 33.09442901611328, 36.865936279296875, 54.47553634643555, 10.709630966186523, 42.47761535644531, 32.95332336425781, 101.17274475097656, 45.649688720703125, 61.48307800292969, 80.72802734375, 1.7581367492675781, 6.944231033325195, 8.057281494140625, 49.53044891357422, 19.87883758544922, -5.767326354980469, 19.504486083984375, 14.754646301269531, 1.9644699096679688, 24.77252960205078, 13.387237548828125, 34.33045196533203, 71.09063720703125, 1.812582015991211, 24.231094360351562, 37.62451934814453, 5.190093994140625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000609.npy"}
|
|
{"epoch": 0.8942731277533039, "step": 610, "batch_size": 64, "mean": 32.680015563964844, "std": 25.850717544555664, "min": -22.056419372558594, "p10": 1.468102264404298, "median": 34.61331558227539, "p90": 67.08309173583984, "max": 92.38442993164062, "pos_frac": 0.921875, "sample": [24.71172523498535, 27.482147216796875, 38.3306884765625, 36.697391510009766, 8.186088562011719, 50.80116271972656, 13.73175048828125, 12.564987182617188, 25.644248962402344, 39.4141845703125, 0.9601974487304688, 58.85346984863281, 2.6532135009765625, 75.82504272460938, 89.6973876953125, 41.18206787109375, 32.690185546875, 31.919836044311523, -9.775291442871094, 15.997665405273438, 33.84028625488281, 35.38634490966797, 5.074302673339844, 43.96769714355469, 15.72113037109375, 19.77678680419922, -7.402858734130859, 5.03375244140625, 78.32209777832031, 92.38442993164062, 79.51089477539062, 6.416343688964844, 0.8397312164306641, 70.83443450927734, 46.887420654296875, 41.327911376953125, 35.63355255126953, 43.87162780761719, 41.644142150878906, 25.245555877685547, 42.76097106933594, 52.6475830078125, -2.361766815185547, 18.290367126464844, 67.19558715820312, 24.850425720214844, 57.88819885253906, 53.328922271728516, 2.7772789001464844, 48.954071044921875, 8.955825805664062, 66.82060241699219, 16.80496597290039, 3.7688636779785156, 40.95927429199219, 47.678550720214844, 54.35896301269531, 38.98234558105469, 10.604085922241211, 49.830718994140625, -22.056419372558594, -11.974624633789062, 62.248291015625, 26.324295043945312], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000610.npy"}
|
|
{"epoch": 0.895741556534508, "step": 611, "batch_size": 64, "mean": 35.87771224975586, "std": 28.05087661743164, "min": -14.700069427490234, "p10": 2.023128890991211, "median": 32.880510330200195, "p90": 73.9546356201172, "max": 96.7728042602539, "pos_frac": 0.9375, "sample": [15.946197509765625, 39.29905700683594, 68.1657485961914, 69.05841064453125, 62.39240264892578, 33.12559509277344, 20.1534423828125, 35.36476135253906, 37.25591278076172, -1.5589218139648438, 31.870826721191406, 71.76959228515625, -14.700069427490234, 4.778102874755859, -6.546588897705078, 2.2825145721435547, 4.7021942138671875, 20.777084350585938, 47.780479431152344, 90.0774917602539, 3.413055419921875, 93.99890899658203, 67.01332092285156, 3.1324920654296875, 42.15671920776367, 27.161102294921875, 81.47016143798828, 74.89108276367188, 19.001426696777344, 52.68736267089844, 39.61650085449219, 22.532180786132812, 47.903038024902344, 10.066766738891602, 25.199668884277344, 22.44493865966797, 0.7492790222167969, 19.012636184692383, 3.200794219970703, 46.81817626953125, 76.61491394042969, 47.0758056640625, 50.81138610839844, 52.800132751464844, 43.64842224121094, 48.13402557373047, 14.523473739624023, 32.63542556762695, 60.92427444458008, 13.218017578125, 30.209247589111328, 1.3465805053710938, 63.82648468017578, 62.090301513671875, -0.9665908813476562, 19.991600036621094, 96.7728042602539, 25.102684020996094, 2.146221160888672, 68.79826354980469, 82.84332275390625, 1.9703750610351562, 53.847076416015625, 13.345630645751953], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000611.npy"}
|
|
{"epoch": 0.8972099853157122, "step": 612, "batch_size": 64, "mean": 40.946144104003906, "std": 27.59153175354004, "min": -15.923892974853516, "p10": 10.427021789550782, "median": 41.15257453918457, "p90": 80.33059387207031, "max": 104.48534393310547, "pos_frac": 0.9375, "sample": [63.21441650390625, 34.083492279052734, 46.66572189331055, 10.312210083007812, 22.543224334716797, 61.053123474121094, 58.99977111816406, 45.380035400390625, 10.00860595703125, -15.923892974853516, 24.73125457763672, 58.90525817871094, 41.96376037597656, 72.60275268554688, 50.779632568359375, 57.32579803466797, 104.48534393310547, 46.158302307128906, 56.245208740234375, 46.142478942871094, 32.982574462890625, 11.918994903564453, 16.902502059936523, 51.894744873046875, 79.79969787597656, 22.155166625976562, 29.16876220703125, 10.694915771484375, 65.19168090820312, 62.590576171875, 50.28966522216797, 21.6165771484375, 5.624481201171875, 12.23095703125, 80.86015319824219, 30.767719268798828, 33.00041198730469, 33.94642639160156, 68.95770263671875, 20.619449615478516, 18.901235580444336, 27.340007781982422, -2.22747802734375, 41.470375061035156, 85.41304016113281, 15.727333068847656, 96.36502075195312, 40.834774017333984, 92.68037414550781, 14.843620300292969, -5.676689147949219, 60.0617561340332, 80.55812072753906, 40.000404357910156, 11.726531982421875, 17.18000030517578, 63.9185791015625, 50.86015319824219, 16.940773010253906, 49.14671325683594, 103.86885070800781, 42.61309051513672, 23.77243995666504, -2.655670166015625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000612.npy"}
|
|
{"epoch": 0.8986784140969163, "step": 613, "batch_size": 64, "mean": 35.77855682373047, "std": 27.456396102905273, "min": -12.120407104492188, "p10": 0.35380821228027437, "median": 37.126983642578125, "p90": 62.118386077880864, "max": 121.32936096191406, "pos_frac": 0.890625, "sample": [40.599365234375, 30.945098876953125, 37.78752136230469, 20.993541717529297, 34.12467956542969, 56.586761474609375, 28.161144256591797, 46.895355224609375, 44.50419616699219, 42.397186279296875, 42.842864990234375, -12.120407104492188, 56.36618423461914, -6.540153503417969, 121.32936096191406, 51.62477111816406, 41.971153259277344, 73.14768981933594, 41.92845153808594, 13.087738037109375, 63.09832000732422, 32.78126525878906, 61.792274475097656, 103.502197265625, 58.70806121826172, 32.43409729003906, 50.06599426269531, 73.51201629638672, 14.12701416015625, 24.728809356689453, 7.9625244140625, 23.016372680664062, 24.307634353637695, 24.934974670410156, 7.8839569091796875, 50.573638916015625, 18.356449127197266, 50.61512756347656, 44.98039245605469, -0.048313140869140625, 42.287940979003906, 51.046302795410156, 62.258148193359375, 22.608543395996094, -1.582305908203125, 43.33721160888672, 26.658660888671875, 119.79960632324219, 2.496488571166992, -6.5037841796875, 36.77714538574219, 38.612388610839844, 1.2920913696289062, 18.200702667236328, 50.3818359375, 52.95663070678711, 27.293289184570312, 12.121307373046875, -3.1679000854492188, 21.090744018554688, 6.6322784423828125, 60.384605407714844, 37.47682189941406, -6.5965728759765625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000613.npy"}
|
|
{"epoch": 0.9001468428781204, "step": 614, "batch_size": 64, "mean": 35.7594108581543, "std": 25.567346572875977, "min": -9.156158447265625, "p10": 4.896209716796877, "median": 35.86566925048828, "p90": 67.44098129272462, "max": 98.75542449951172, "pos_frac": 0.953125, "sample": [18.411285400390625, 44.73944854736328, 16.345474243164062, 52.95458984375, 24.204025268554688, 47.126461029052734, 10.373355865478516, 57.83917236328125, 21.559934616088867, 3.1920204162597656, 46.79850769042969, 23.77657699584961, 46.073028564453125, 50.05914306640625, 49.902488708496094, -9.156158447265625, 47.7130126953125, 98.75542449951172, 28.883377075195312, 2.771251678466797, 91.30963134765625, 7.1116943359375, 40.264469146728516, 33.77952194213867, 35.414268493652344, 3.94671630859375, 40.50115966796875, 30.521377563476562, 20.375926971435547, 39.87433624267578, 30.923004150390625, 32.09058380126953, 30.00063705444336, 41.95222854614258, 15.672386169433594, 39.07218933105469, 10.792720794677734, 1.4810962677001953, 36.31707000732422, 52.34282684326172, 18.05907440185547, 9.066158294677734, 41.42561340332031, 44.45405578613281, 65.35164642333984, 90.22541809082031, -9.070411682128906, 31.946502685546875, 39.41743469238281, 7.8668212890625, 16.696815490722656, 10.452695846557617, 44.50128173828125, 41.56214904785156, 95.72602081298828, 68.33641052246094, 41.96112823486328, 16.613557815551758, 41.05585479736328, -1.6690597534179688, 95.87632751464844, 22.04730224609375, 52.227256774902344, 88.40596771240234], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000614.npy"}
|
|
{"epoch": 0.9016152716593245, "step": 615, "batch_size": 64, "mean": 30.23415184020996, "std": 23.44518280029297, "min": -6.964441299438477, "p10": 3.257430267333985, "median": 24.836122512817383, "p90": 60.99833221435547, "max": 79.88117218017578, "pos_frac": 0.921875, "sample": [2.93682861328125, 67.80497741699219, 29.568342208862305, 7.6898651123046875, 19.222185134887695, 4.005500793457031, -0.1749420166015625, 5.3954315185546875, 24.865966796875, 15.762344360351562, 53.324554443359375, 47.24806213378906, 17.156898498535156, 48.78266143798828, 70.4590072631836, 16.667572021484375, 13.763092041015625, 58.804969787597656, 14.127769470214844, 7.225578308105469, 6.279659271240234, 2.315093994140625, 75.54811096191406, 6.678337097167969, 12.574586868286133, 61.110626220703125, 49.394134521484375, 67.55776977539062, 42.288021087646484, 7.568140029907227, 18.581539154052734, 50.19978332519531, 37.013702392578125, 7.6113128662109375, 23.657634735107422, 47.00914764404297, 54.06941223144531, 44.35154724121094, 23.311294555664062, 60.73631286621094, -6.837516784667969, 54.83122253417969, 12.531997680664062, -6.964441299438477, 30.309356689453125, 32.566856384277344, 35.21397399902344, 69.13798522949219, 20.82757568359375, 56.45296859741211, 51.275211334228516, 10.910150527954102, 37.791481018066406, -3.7553482055664062, 20.275123596191406, 4.33857536315918, 37.18581771850586, 34.25952911376953, 24.806278228759766, 79.88117218017578, -0.9369964599609375, 58.5274658203125, 52.00657272338867, 7.857940673828125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000615.npy"}
|
|
{"epoch": 0.9030837004405287, "step": 616, "batch_size": 64, "mean": 32.59221649169922, "std": 30.33946990966797, "min": -24.325332641601562, "p10": -0.2779418945312494, "median": 30.918426513671875, "p90": 65.9711311340332, "max": 144.134521484375, "pos_frac": 0.890625, "sample": [59.83470916748047, 6.570075988769531, 40.802215576171875, 50.339969635009766, 4.238800048828125, 31.421911239624023, 17.193817138671875, 14.212677001953125, 36.25322723388672, 48.936798095703125, 63.98441696166992, 43.75958251953125, 40.569862365722656, 21.601348876953125, 54.722145080566406, 27.238306045532227, 26.473663330078125, 39.16181182861328, 9.726951599121094, 14.532978057861328, 127.20916748046875, 61.387916564941406, 4.76885986328125, 16.454681396484375, -24.325332641601562, -12.834991455078125, 32.19288635253906, 8.617019653320312, 34.072540283203125, 40.46357727050781, 144.134521484375, 25.040441513061523, -1.0487594604492188, 28.733055114746094, 5.864665985107422, -5.702770233154297, 19.10291862487793, 32.565093994140625, 44.080665588378906, 15.037322998046875, 0.2877960205078125, 78.95707702636719, 15.464859008789062, 73.86991119384766, 31.15685272216797, 86.39889526367188, 32.09467315673828, 20.408279418945312, 29.561569213867188, 30.977493286132812, -0.5204010009765625, 39.76829147338867, 47.52561569213867, -11.095649719238281, 21.46892547607422, 30.859359741210938, 78.97148132324219, 48.67155075073242, 33.12681198120117, 29.807472229003906, 65.17900085449219, 7.7869415283203125, 66.31061553955078, -18.52423858642578], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000616.npy"}
|
|
{"epoch": 0.9045521292217328, "step": 617, "batch_size": 64, "mean": 29.974029541015625, "std": 23.548133850097656, "min": -11.338577270507812, "p10": 2.3658327102661136, "median": 26.21994113922119, "p90": 62.1591293334961, "max": 100.15568542480469, "pos_frac": 0.953125, "sample": [23.216964721679688, 54.05759048461914, -0.203521728515625, 27.973983764648438, 11.450387954711914, 15.270614624023438, 59.853485107421875, 15.321849822998047, 52.99784851074219, 8.46235466003418, 19.84203338623047, 37.82453155517578, 10.001205444335938, 66.05968475341797, 23.205371856689453, 47.442298889160156, 2.7834911346435547, 31.507293701171875, 14.449562072753906, 100.15568542480469, 25.01390838623047, 12.102340698242188, 91.52320098876953, 53.95537567138672, 10.987834930419922, 34.33928680419922, 33.70446014404297, 14.721782684326172, 42.56318664550781, 70.98912048339844, 27.144350051879883, 28.072683334350586, 11.418563842773438, -7.786582946777344, 52.97537612915039, 57.57518005371094, 42.87220001220703, 37.44462585449219, 41.644439697265625, 41.863868713378906, 13.891918182373047, 26.26721954345703, 35.70759582519531, 60.95610046386719, 64.6641845703125, 49.62477111816406, 1.5422706604003906, -11.338577270507812, 25.0111083984375, 29.799034118652344, 5.591064453125, 1.90472412109375, 2.1868362426757812, 26.17266273498535, 8.952850341796875, 4.0944061279296875, 31.508155822753906, 19.50530242919922, 12.018938064575195, 13.805999755859375, 62.674713134765625, 23.0560302734375, 63.889312744140625, 2.0532760620117188], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000617.npy"}
|
|
{"epoch": 0.9060205580029369, "step": 618, "batch_size": 64, "mean": 30.85983657836914, "std": 26.704391479492188, "min": -13.601818084716797, "p10": -1.3543628692626948, "median": 28.552696228027344, "p90": 68.87801971435547, "max": 105.69377136230469, "pos_frac": 0.875, "sample": [-6.058013916015625, 32.40919494628906, 9.960060119628906, 27.10334014892578, -12.676948547363281, 28.5887451171875, 20.211647033691406, 25.221942901611328, 105.69377136230469, 36.80863952636719, 59.09092712402344, 11.470781326293945, 39.43665313720703, 45.50349426269531, 28.516647338867188, 30.515167236328125, -1.9983596801757812, 71.40288543701172, 9.01861572265625, -1.605621337890625, -0.7680931091308594, 10.021907806396484, 17.759483337402344, 1.0865325927734375, 8.672119140625, 12.756080627441406, 32.65931701660156, 23.172821044921875, 67.18049621582031, 34.99720764160156, 12.416732788085938, 20.133779525756836, 61.363502502441406, 56.403236389160156, 62.386474609375, -13.601818084716797, -2.80682373046875, 29.83075714111328, -6.009527206420898, 32.84539794921875, 28.50353240966797, 6.944587707519531, 76.43429565429688, 2.0938720703125, 38.04081726074219, 3.819122314453125, 38.26806640625, 34.675323486328125, 44.5144157409668, 92.55335998535156, 33.84149932861328, 13.290355682373047, 87.49463653564453, 55.845184326171875, 56.66498565673828, 38.33293533325195, 10.405250549316406, 40.836090087890625, 69.60552978515625, 17.50067138671875, 21.275299072265625, 41.419891357421875, 81.18470764160156, 22.372108459472656], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000618.npy"}
|
|
{"epoch": 0.9074889867841409, "step": 619, "batch_size": 64, "mean": 32.923736572265625, "std": 30.157283782958984, "min": -12.581672668457031, "p10": -1.8260116577148435, "median": 28.75472640991211, "p90": 80.46464385986332, "max": 125.7515869140625, "pos_frac": 0.875, "sample": [-1.7108154296875, 12.819290161132812, 24.2701416015625, 30.86609649658203, 23.868751525878906, 0.7455902099609375, 34.00534439086914, 5.617645263671875, 53.04790496826172, 29.436735153198242, 36.20299530029297, 20.098541259765625, 24.238174438476562, 125.7515869140625, 31.029510498046875, 48.45252990722656, 95.42730712890625, 18.52753257751465, 72.56681823730469, 46.16326904296875, -8.28656005859375, -3.146059036254883, 50.83827209472656, 97.173095703125, 10.514671325683594, -4.099233627319336, 4.873512268066406, 37.33988952636719, 40.808921813964844, 28.383743286132812, -11.870441436767578, 62.194313049316406, 21.74181365966797, 10.142646789550781, 40.139556884765625, 25.945404052734375, 27.261781692504883, 11.404792785644531, 9.665494918823242, 30.250967025756836, 42.76976776123047, 83.84942626953125, 56.49980163574219, 93.7633056640625, 49.904052734375, 25.41185188293457, -12.581672668457031, 91.51716613769531, 68.72433471679688, 9.812667846679688, 3.14996337890625, 17.738475799560547, 32.19129180908203, 86.28274536132812, 2.29595947265625, 15.578004837036133, 54.80291748046875, 10.891979217529297, 57.125999450683594, 29.125709533691406, 51.67201232910156, -5.293182373046875, -1.8753814697265625, 31.060577392578125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000619.npy"}
|
|
{"epoch": 0.908957415565345, "step": 620, "batch_size": 64, "mean": 35.61638259887695, "std": 26.59665298461914, "min": 2.2162094116210938, "p10": 6.39809513092041, "median": 29.959335327148438, "p90": 70.06089172363282, "max": 121.81719970703125, "pos_frac": 1.0, "sample": [44.821449279785156, 6.044040679931641, 83.04816436767578, 26.135643005371094, 6.350318908691406, 63.233673095703125, 52.29413986206055, 16.979707717895508, 7.655342102050781, 30.41424560546875, 4.102485656738281, 49.61321258544922, 22.294342041015625, 52.16539764404297, 70.50459289550781, 30.3587646484375, 36.211273193359375, 26.065868377685547, 38.439727783203125, 38.59416198730469, 51.003074645996094, 31.050617218017578, 12.99053955078125, 42.659507751464844, 11.865676879882812, 28.002784729003906, 44.119110107421875, 4.868612289428711, 47.72694396972656, 2.40997314453125, 12.294120788574219, 74.57568359375, 29.240341186523438, 121.81719970703125, 42.77203369140625, 24.768966674804688, 37.84630584716797, 31.61957550048828, 93.427978515625, 48.70911407470703, 16.445158004760742, 39.10774230957031, 19.47323226928711, 25.364036560058594, 101.38639831542969, 23.098407745361328, 69.02558898925781, 56.85975646972656, 23.81298065185547, 29.528778076171875, 22.150257110595703, 29.559906005859375, 111.71920776367188, 6.535896301269531, 43.66508865356445, 49.969879150390625, 17.612060546875, 2.2162094116210938, 6.1179351806640625, 40.53919982910156, 6.509572982788086, 7.3531951904296875, 7.6077880859375, 24.695556640625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000620.npy"}
|
|
{"epoch": 0.9104258443465492, "step": 621, "batch_size": 64, "mean": 36.26169967651367, "std": 26.917091369628906, "min": -21.76903533935547, "p10": 7.441926002502442, "median": 34.25612258911133, "p90": 61.58816223144532, "max": 142.17507934570312, "pos_frac": 0.9375, "sample": [86.70170593261719, 3.6133651733398438, -21.76903533935547, 47.630462646484375, 35.52952575683594, 26.127159118652344, 52.894752502441406, 40.00325012207031, -5.071048736572266, 89.95250701904297, 21.610641479492188, 25.463275909423828, 24.62964630126953, 63.313690185546875, 31.86626434326172, 21.709184646606445, -1.9336795806884766, 20.579946517944336, 46.76752471923828, 44.929168701171875, 142.17507934570312, 40.69120788574219, 61.695068359375, 26.437705993652344, 84.3603744506836, 54.10961151123047, 61.11151885986328, 21.026107788085938, 51.052154541015625, 39.92314529418945, 10.910537719726562, 36.346099853515625, 56.658172607421875, 7.068267822265625, 32.3255615234375, 57.512794494628906, 33.054256439208984, 30.44512939453125, 15.203468322753906, 21.2783203125, -6.232410430908203, 16.10131072998047, 15.314712524414062, 43.86509704589844, 39.365440368652344, 49.00177764892578, 29.62384033203125, 40.71138000488281, 20.54114532470703, 40.1878547668457, 5.9605865478515625, 8.31379508972168, 48.72071838378906, 61.338714599609375, 39.01081848144531, 15.980178833007812, 42.68040466308594, 30.000762939453125, 18.40258026123047, 30.256927490234375, 107.73095703125, 37.01824951171875, 13.463027954101562, 35.45798873901367], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000621.npy"}
|
|
{"epoch": 0.9118942731277533, "step": 622, "batch_size": 64, "mean": 36.819705963134766, "std": 28.990467071533203, "min": -15.751197814941406, "p10": 3.4215026855468755, "median": 34.931846618652344, "p90": 77.7275375366211, "max": 113.98033142089844, "pos_frac": 0.953125, "sample": [66.98393249511719, 47.485877990722656, 31.031707763671875, 19.67298126220703, 47.66215515136719, 82.24354553222656, 7.440219879150391, 34.93947219848633, 93.336669921875, 53.15420150756836, 39.24702453613281, 52.12281036376953, 39.17639923095703, 12.557235717773438, 7.825366973876953, 77.48094177246094, 14.005943298339844, 36.01873016357422, 57.613677978515625, 6.7113800048828125, 25.13949203491211, 27.34575653076172, 12.662567138671875, 94.69647216796875, 14.284767150878906, 3.175994873046875, 57.606910705566406, 30.2847900390625, 57.77501678466797, 8.184814453125, 42.24540710449219, 28.453338623046875, 113.98033142089844, 1.651449203491211, 14.487268447875977, 71.6590576171875, -14.9085693359375, 40.599815368652344, 76.02951049804688, 34.92422103881836, 62.419281005859375, 37.31682586669922, 3.994354248046875, 4.041481018066406, 38.97657012939453, 0.41103363037109375, 20.262069702148438, -15.751197814941406, 7.5686492919921875, 2.4375648498535156, 77.83322143554688, 56.54090881347656, 84.10209655761719, 66.32803344726562, 44.38301086425781, 30.392536163330078, 24.965797424316406, -4.822196960449219, 10.802669525146484, 78.2374267578125, 66.59464263916016, 27.191722869873047, 22.541934967041016, 42.704063415527344], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000622.npy"}
|
|
{"epoch": 0.9133627019089574, "step": 623, "batch_size": 64, "mean": 30.37273597717285, "std": 29.304155349731445, "min": -18.77422332763672, "p10": -3.51177864074707, "median": 25.30834197998047, "p90": 71.72511444091798, "max": 130.27420043945312, "pos_frac": 0.859375, "sample": [25.28424072265625, 14.427078247070312, -3.691162109375, 57.16602325439453, 36.21363830566406, -3.361705780029297, 36.60448455810547, 82.53579711914062, 62.90675735473633, 24.29303741455078, 72.98480224609375, 16.947498321533203, 40.24976348876953, -3.5760955810546875, 48.17547607421875, 13.311424255371094, 72.3441162109375, 11.36566162109375, 11.59649658203125, 14.986846923828125, 10.40700912475586, 23.845401763916016, 32.63127136230469, 35.81629180908203, -16.092605590820312, 10.23480224609375, 70.28077697753906, 38.225032806396484, 14.076923370361328, 33.137046813964844, 37.0364990234375, -0.6039810180664062, -4.253700256347656, 13.671045303344727, -10.186285018920898, 4.28679084777832, 78.95442962646484, 27.36041259765625, 4.842866897583008, 6.190986633300781, 28.75933837890625, 42.910560607910156, 58.50844955444336, 66.89494323730469, 44.455780029296875, 6.386940002441406, 59.83589172363281, 55.261741638183594, 14.992931365966797, -18.77422332763672, 130.27420043945312, 90.93374633789062, 14.725845336914062, 4.913930892944336, 9.977806091308594, 56.289642333984375, -13.6041259765625, 25.332443237304688, 43.65578842163086, 36.38656234741211, 76.39826965332031, 41.89338684082031, 13.703544616699219, 13.116508483886719], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000623.npy"}
|
|
{"epoch": 0.9148311306901615, "step": 624, "batch_size": 64, "mean": 37.318939208984375, "std": 27.726625442504883, "min": -28.102996826171875, "p10": -2.399972534179687, "median": 40.670066833496094, "p90": 69.39462738037109, "max": 88.00047302246094, "pos_frac": 0.859375, "sample": [33.9625129699707, -12.729473114013672, 50.707000732421875, 49.39954376220703, 38.326507568359375, 72.31198120117188, 44.09589767456055, 57.36021423339844, 68.94091796875, 43.67516326904297, 40.14276885986328, 9.11285400390625, 40.23400115966797, 69.44570922851562, 53.48793029785156, 37.65605926513672, 67.3080825805664, 57.53053283691406, 86.1893310546875, 6.116874694824219, 32.75849914550781, 69.27543640136719, 40.565650939941406, 7.654733657836914, 40.77448272705078, 36.670860290527344, 76.74584197998047, 68.75069427490234, -8.509674072265625, 19.89960479736328, 43.22125244140625, 58.105926513671875, -8.332489013671875, 23.886905670166016, 26.95123291015625, 41.02735900878906, 53.66560745239258, 4.796464920043945, 84.10063171386719, 4.8179779052734375, 27.04460906982422, 36.310997009277344, 41.70359802246094, -28.102996826171875, 63.87467956542969, 42.68452453613281, 49.62202453613281, 35.64946746826172, 60.6923828125, 52.244834899902344, 31.867774963378906, -2.2061309814453125, -0.8276348114013672, 88.00047302246094, -15.431543350219727, 6.75567626953125, -2.4830474853515625, 6.536960601806641, -8.548412322998047, 45.17125701904297, 34.74646759033203, 54.3485107421875, 77.03594970703125, 61.620361328125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000624.npy"}
|
|
{"epoch": 0.9162995594713657, "step": 625, "batch_size": 64, "mean": 35.52130889892578, "std": 28.53171730041504, "min": -9.095909118652344, "p10": 3.0694746017456063, "median": 27.32398223876953, "p90": 71.45126724243164, "max": 110.394775390625, "pos_frac": 0.9375, "sample": [7.8975067138671875, 24.276288986206055, 52.564918518066406, 40.488548278808594, 27.442401885986328, 46.983062744140625, 58.61590576171875, 99.01031494140625, 27.032455444335938, 66.7718734741211, 22.2657470703125, -5.464752197265625, 20.03612518310547, 12.715896606445312, 71.37203979492188, 46.97895812988281, 1.832498550415039, 11.323246002197266, 110.394775390625, 37.79075622558594, 69.38873291015625, 22.124242782592773, 13.642772674560547, 2.649209976196289, 17.29582977294922, 38.843666076660156, 21.781814575195312, 21.24767303466797, 58.43788146972656, 15.976810455322266, 58.39543151855469, 32.32880401611328, 91.36449432373047, 40.60969161987305, 63.166709899902344, 43.11595153808594, 10.715164184570312, 54.49905776977539, 24.270709991455078, 6.202550888061523, -9.095909118652344, 23.494537353515625, 93.7112045288086, 19.0379638671875, 50.9141845703125, 56.874229431152344, 10.746660232543945, 49.296051025390625, 26.826675415039062, 31.253793716430664, 2.6960315704345703, -6.947479248046875, 31.276466369628906, 22.292640686035156, 27.205562591552734, 43.31919860839844, 3.9408416748046875, 32.61681365966797, 86.27721405029297, -8.569686889648438, 71.48522186279297, 6.340675354003906, 100.8428955078125, 21.142173767089844], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000625.npy"}
|
|
{"epoch": 0.9177679882525698, "step": 626, "batch_size": 64, "mean": 30.092287063598633, "std": 28.965452194213867, "min": -17.175506591796875, "p10": -2.8985300064086914, "median": 21.56238555908203, "p90": 71.35664901733398, "max": 102.23062133789062, "pos_frac": 0.859375, "sample": [25.749237060546875, 34.15519714355469, 11.355796813964844, 6.193412780761719, 13.48779296875, 41.255027770996094, 76.5006332397461, 11.507270812988281, 17.491668701171875, 59.988739013671875, 46.865478515625, 32.41884231567383, 71.32096862792969, 84.89370727539062, -2.6972503662109375, -5.906288146972656, 29.848487854003906, 10.54460334777832, 12.45233154296875, 10.407207489013672, 11.942626953125, 1.8229904174804688, 62.21336364746094, 11.507478713989258, -9.207490921020508, 23.326187133789062, 34.643035888671875, 28.343788146972656, 13.954875946044922, 71.37194061279297, 12.994577407836914, 61.985328674316406, 1.5012626647949219, 10.668720245361328, 56.15045928955078, 48.327606201171875, 53.21380615234375, 42.264488220214844, 34.210960388183594, 19.39423370361328, 87.47482299804688, 19.798583984375, 102.23062133789062, 101.01182556152344, 55.3834228515625, 25.32062530517578, -7.1520843505859375, 16.841400146484375, 23.74664306640625, 15.535888671875, -3.605499267578125, 56.97547912597656, 89.66121673583984, 53.61285400390625, 13.491714477539062, -0.3264007568359375, 14.142333984375, 59.87547302246094, -17.175506591796875, -5.2804718017578125, 5.5285491943359375, 11.823966979980469, 31.512466430664062, -2.984792709350586], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000626.npy"}
|
|
{"epoch": 0.9192364170337739, "step": 627, "batch_size": 64, "mean": 33.95563507080078, "std": 34.09070587158203, "min": -31.861778259277344, "p10": -3.2723566055297852, "median": 27.479564666748047, "p90": 85.23593215942388, "max": 131.99078369140625, "pos_frac": 0.859375, "sample": [31.88971710205078, 54.09320068359375, 90.90275573730469, 55.48847961425781, 15.537643432617188, 4.841400146484375, 58.30286407470703, 37.5653076171875, 91.68428039550781, 28.451171875, 58.53666687011719, -7.760223388671875, 14.393753051757812, 11.766347885131836, -3.3083019256591797, 53.69029235839844, 8.950122833251953, 31.553863525390625, 48.13908386230469, 51.83453369140625, -16.08148193359375, 42.26862716674805, 68.28340148925781, 1.8164081573486328, 116.95584106445312, -9.87615966796875, 58.30473327636719, 92.69882202148438, 7.81243896484375, 4.513959884643555, 15.066612243652344, 8.132488250732422, 41.622955322265625, 131.99078369140625, 72.01334381103516, 6.772006988525391, 60.98954772949219, 19.955612182617188, 55.62751770019531, -6.315547943115234, 3.5513858795166016, 22.460302352905273, 105.90843200683594, -10.84722900390625, 17.664230346679688, 2.1293411254882812, 52.10856628417969, 25.35301971435547, -3.1884841918945312, 20.033493041992188, 65.90528869628906, 15.289459228515625, 30.588077545166016, -31.861778259277344, 13.131542205810547, 46.38909912109375, 25.782318115234375, 26.507957458496094, 10.656944274902344, 92.86451721191406, 53.88975524902344, -2.534271240234375, 48.42073059082031, 33.85295104980469], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000627.npy"}
|
|
{"epoch": 0.920704845814978, "step": 628, "batch_size": 64, "mean": 35.616455078125, "std": 26.15797996520996, "min": -8.909416198730469, "p10": 4.7138875961303714, "median": 31.757064819335938, "p90": 71.54060668945313, "max": 103.80914306640625, "pos_frac": 0.96875, "sample": [30.941862106323242, -0.5642166137695312, 40.24609375, 52.354248046875, 37.046875, 54.397735595703125, 16.326339721679688, 30.64502716064453, 12.636642456054688, 15.497865676879883, 17.00086212158203, 21.05644989013672, -8.909416198730469, 71.88336181640625, 31.310211181640625, 49.66505432128906, 9.360057830810547, 3.4176101684570312, 103.80914306640625, 52.228729248046875, 50.67218017578125, 40.17393493652344, 29.187423706054688, 48.524986267089844, 15.152107238769531, 1.6358489990234375, 37.67280578613281, 34.90984344482422, 16.58924102783203, 26.522083282470703, 70.7408447265625, 16.55638885498047, 33.48875427246094, 82.19281005859375, 45.975341796875, 69.01264953613281, 25.885709762573242, 32.20391845703125, 20.07436180114746, 10.402965545654297, 40.19450378417969, 3.9111671447753906, 4.451395034790039, 42.89335632324219, 83.3326187133789, 22.870010375976562, 65.28490447998047, 11.822341918945312, 10.652366638183594, 15.0218505859375, 51.965606689453125, 24.12053108215332, 97.2214126586914, 95.26270294189453, 50.890106201171875, 9.747259140014648, 58.725006103515625, 5.3263702392578125, 72.6528091430664, 35.047245025634766, 65.62857055664062, 13.848800659179688, 50.292205810546875, 0.3652381896972656], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000628.npy"}
|
|
{"epoch": 0.922173274596182, "step": 629, "batch_size": 64, "mean": 31.81080436706543, "std": 25.537303924560547, "min": -28.626632690429688, "p10": 3.609826850891113, "median": 30.256628036499023, "p90": 59.40623092651368, "max": 136.9967041015625, "pos_frac": 0.96875, "sample": [57.06707763671875, 50.75973892211914, 5.26264762878418, 25.259824752807617, 72.65126037597656, 18.981060028076172, 19.759849548339844, 30.959213256835938, 25.08667755126953, 57.12663269042969, 24.396316528320312, 17.40904998779297, 53.32147979736328, 4.756374359130859, 36.805213928222656, 17.889915466308594, 43.701148986816406, 39.36275863647461, 18.0678653717041, 28.90430450439453, 3.6064376831054688, 75.30271911621094, 47.61888122558594, 0.013885498046875, 21.550525665283203, 46.514060974121094, 54.337791442871094, 3.693115234375, 49.673301696777344, 42.203125, 13.271678924560547, 0.34024810791015625, 136.9967041015625, 36.18597412109375, 33.878814697265625, 7.029073715209961, 9.252220153808594, 50.37799072265625, 54.34336853027344, 2.4570159912109375, 20.24401092529297, 22.353988647460938, 31.25470733642578, 38.43775939941406, 64.53350830078125, 27.31085968017578, 45.021026611328125, -4.165351867675781, 33.19647216796875, 3.617734909057617, 30.821582794189453, 29.691673278808594, 0.8497314453125, 60.61563491821289, 5.1747589111328125, 79.27455139160156, 39.16748809814453, 60.383201599121094, 27.94927978515625, 41.19959259033203, 45.047454833984375, 21.18646240234375, 5.1766815185546875, -28.626632690429688], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000629.npy"}
|
|
{"epoch": 0.9236417033773862, "step": 630, "batch_size": 64, "mean": 30.402843475341797, "std": 25.56612205505371, "min": -21.038734436035156, "p10": 0.259550666809083, "median": 27.91111946105957, "p90": 70.356298828125, "max": 84.62535095214844, "pos_frac": 0.890625, "sample": [83.61354064941406, 3.094268798828125, 11.373544692993164, 20.712074279785156, -4.268272399902344, 71.33223724365234, 49.681365966796875, 76.07752990722656, 75.57696533203125, 37.65914535522461, 39.82575988769531, 28.2843017578125, 31.024169921875, 68.07910919189453, 68.06426239013672, 11.473831176757812, 7.315153121948242, -0.23346710205078125, 24.701072692871094, 44.23520278930664, -2.3570556640625, 14.984411239624023, 9.332351684570312, 59.90690994262695, -3.4862289428710938, 84.62535095214844, 25.068086624145508, 44.31446838378906, 13.880149841308594, 15.511016845703125, 1.212697982788086, 53.367820739746094, 79.303955078125, -9.22528076171875, 47.5634880065918, 31.259191513061523, 31.903194427490234, 48.81608200073242, 7.6261749267578125, 52.898895263671875, 14.975051879882812, 9.56167984008789, 9.367294311523438, 11.456245422363281, 41.21612548828125, 18.555389404296875, 37.5157470703125, 50.387306213378906, 27.53793716430664, 18.08233642578125, 34.503814697265625, 41.579017639160156, 15.216880798339844, 35.10906982421875, 3.7233543395996094, 34.2213134765625, 17.6268310546875, 25.17620849609375, 30.944469451904297, -21.038734436035156, 79.79464721679688, -0.1489410400390625, 53.341915130615234, 2.949522018432617], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000630.npy"}
|
|
{"epoch": 0.9251101321585903, "step": 631, "batch_size": 64, "mean": 28.286867141723633, "std": 29.5904598236084, "min": -48.538818359375, "p10": -7.662013244628903, "median": 25.141874313354492, "p90": 75.69249572753907, "max": 102.47731018066406, "pos_frac": 0.859375, "sample": [79.5079345703125, 23.60882568359375, 26.455856323242188, 49.350460052490234, 33.92637634277344, 48.02635192871094, 50.397605895996094, 32.599525451660156, -11.085018157958984, 0.28619384765625, -14.083221435546875, 97.03376770019531, 10.4312744140625, 57.272743225097656, 25.079639434814453, -4.15826416015625, 25.20410919189453, 75.58438110351562, 30.872085571289062, 7.821126937866211, 10.344659805297852, 8.99131965637207, 44.29277801513672, -1.7889633178710938, 53.74300765991211, 40.6048469543457, 38.63631820678711, 19.339141845703125, 25.556900024414062, 5.220001220703125, -9.465194702148438, 40.735435485839844, 23.40447235107422, 27.135780334472656, 77.73748779296875, -18.04876708984375, 14.432518005371094, 8.758499145507812, 19.886411666870117, 60.11578369140625, 24.84186553955078, 9.027589797973633, 75.73883056640625, 77.31715393066406, 18.801128387451172, 4.953561782836914, 82.00590515136719, 7.6195831298828125, 40.383575439453125, 34.89599609375, 30.208816528320312, 6.624452590942383, 18.594844818115234, 18.31293487548828, -48.538818359375, -9.163619995117188, 52.467071533203125, 36.08349609375, 102.47731018066406, -27.303199768066406, 43.49285888671875, 23.79534912109375, 13.026741027832031, 40.931922912597656], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000631.npy"}
|
|
{"epoch": 0.9265785609397944, "step": 632, "batch_size": 64, "mean": 32.566551208496094, "std": 30.02157211303711, "min": -61.45722961425781, "p10": -5.9320505142211895, "median": 33.68462562561035, "p90": 63.4959457397461, "max": 107.74049377441406, "pos_frac": 0.84375, "sample": [5.770717620849609, -61.45722961425781, 44.09281921386719, 36.43202209472656, 59.81922149658203, -4.181800842285156, 55.60746765136719, 87.05409240722656, 71.00178527832031, 40.31483459472656, 67.39094543457031, 12.7945556640625, 55.54810333251953, 12.768402099609375, 39.16101837158203, 24.183734893798828, 59.428558349609375, 63.690582275390625, -1.1010704040527344, 18.99353790283203, 45.97681427001953, 33.128814697265625, 60.296714782714844, 55.92610168457031, -7.326103210449219, 33.33766174316406, 60.271514892578125, -20.009803771972656, 43.235328674316406, 27.31484031677246, 14.380035400390625, -24.216115951538086, 96.24607849121094, 25.594802856445312, -3.7843170166015625, 22.56186294555664, 48.68622589111328, 33.5135383605957, 36.423561096191406, 19.84521484375, 52.25818634033203, 11.075515747070312, -10.045654296875, 33.855712890625, 20.725067138671875, 47.64691162109375, 38.12037658691406, 54.29296112060547, 107.74049377441406, 78.76044464111328, 63.04179382324219, 41.2816162109375, -21.77935028076172, 18.257354736328125, 13.28265380859375, 47.074554443359375, -6.682157516479492, 42.24005889892578, 16.668148040771484, 21.57990264892578, 53.829010009765625, 29.016416549682617, 13.143564224243164, 30.160614013671875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000632.npy"}
|
|
{"epoch": 0.9280469897209985, "step": 633, "batch_size": 64, "mean": 27.419578552246094, "std": 25.544029235839844, "min": -21.05096435546875, "p10": -2.2383945465087884, "median": 20.775885581970215, "p90": 60.08617172241212, "max": 86.0582275390625, "pos_frac": 0.84375, "sample": [14.827354431152344, 53.16407012939453, 14.597850799560547, 17.55841827392578, 49.41668701171875, 42.1947021484375, -1.0664520263671875, 1.3684310913085938, 71.34563446044922, 36.92454528808594, 21.294818878173828, 56.60929870605469, 53.27015686035156, -2.4582595825195312, 15.310897827148438, 1.1843318939208984, 39.11278533935547, 43.031166076660156, 17.654518127441406, 25.56622314453125, 14.276641845703125, 86.0582275390625, 52.40254211425781, -3.0625762939453125, 74.114501953125, 29.047622680664062, 20.2569522857666, 28.14850425720215, 1.62060546875, -4.118230819702148, 73.40013122558594, 32.464744567871094, 45.00126647949219, 13.391555786132812, -7.000951766967773, -1.7253761291503906, -0.9183502197265625, 8.318161010742188, -21.05096435546875, 30.014432907104492, 60.852081298828125, 47.12811279296875, 10.087974548339844, 3.6693572998046875, 57.070465087890625, 36.2669677734375, 39.416160583496094, 84.60083770751953, 58.299049377441406, 10.319282531738281, -9.2911376953125, 18.744064331054688, 8.902368545532227, 13.712089538574219, 12.400249481201172, 16.54925537109375, 47.89437484741211, 84.218505859375, -2.780355453491211, 23.83544921875, 15.768287658691406, 8.264541625976562, 33.618568420410156, 33.75981140136719], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000633.npy"}
|
|
{"epoch": 0.9295154185022027, "step": 634, "batch_size": 64, "mean": 27.910812377929688, "std": 29.121278762817383, "min": -25.79925537109375, "p10": -5.474050903320312, "median": 23.10918426513672, "p90": 72.55616302490235, "max": 109.80221557617188, "pos_frac": 0.828125, "sample": [6.716697692871094, 34.63453674316406, 28.624040603637695, 86.64595031738281, 18.475122451782227, 6.974485397338867, 2.0223159790039062, 9.731193542480469, 25.5721435546875, 82.15158081054688, -22.161516189575195, 24.550201416015625, 6.939416885375977, 20.772979736328125, 81.9400634765625, 72.82679748535156, -9.903736114501953, 11.88140869140625, 39.786102294921875, 13.604766845703125, 21.668167114257812, -25.79925537109375, -1.0339508056640625, 24.665451049804688, -4.928169250488281, 12.05801773071289, -6.6683502197265625, 57.61133575439453, 48.914337158203125, 14.747447967529297, 71.9246826171875, 12.146903991699219, 25.68962860107422, -10.232551574707031, 75.59005737304688, 57.53461837768555, 109.80221557617188, 19.982250213623047, 43.44062423706055, -0.23295211791992188, 45.542842864990234, 2.7025909423828125, -1.3144607543945312, 11.108341217041016, 26.59693145751953, 6.47608757019043, 47.020355224609375, 16.92730712890625, 35.03330993652344, 48.36576843261719, 16.757688522338867, 59.97981262207031, 61.24298858642578, 54.199623107910156, 59.300018310546875, 26.235252380371094, 28.521156311035156, 83.49874877929688, -8.561813354492188, 28.121612548828125, -5.708000183105469, 12.981094360351562, 11.803003311157227, 30.796630859375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000634.npy"}
|
|
{"epoch": 0.9309838472834068, "step": 635, "batch_size": 64, "mean": 28.63296127319336, "std": 22.558204650878906, "min": -14.66988754272461, "p10": 2.919272422790528, "median": 26.074861526489258, "p90": 61.150843811035166, "max": 96.20649719238281, "pos_frac": 0.921875, "sample": [11.648822784423828, 44.283912658691406, 62.09429931640625, 7.761322021484375, 55.042503356933594, 26.028507232666016, 13.255393981933594, 12.004024505615234, 22.773094177246094, 14.882745742797852, 33.35248565673828, 53.12498474121094, 30.645095825195312, -4.7563323974609375, 3.26904296875, 45.71333312988281, 14.836112976074219, 32.75372314453125, 69.59429931640625, 26.1212158203125, 33.09842300415039, 15.99981689453125, 29.95947265625, 64.77568054199219, 45.050907135009766, 23.278610229492188, 1.8299713134765625, 26.41895294189453, 33.45408630371094, 79.52925109863281, 5.36851692199707, 53.05641174316406, -14.66988754272461, 66.96409606933594, 22.5723876953125, 23.50212860107422, 39.15283966064453, 11.82962417602539, 3.2415695190429688, 38.97108459472656, 16.98453140258789, 67.58306121826172, 96.20649719238281, 35.88233947753906, 23.889907836914062, 15.003646850585938, 58.94944763183594, 7.376152038574219, -6.453514099121094, 16.59471893310547, 36.02496337890625, -1.4905242919921875, 48.81036376953125, 37.78521728515625, 26.432804107666016, 2.7811450958251953, 18.861446380615234, 18.659683227539062, 31.332611083984375, 43.913726806640625, 7.151294708251953, -0.76300048828125, 42.958343505859375, 10.222122192382812], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000635.npy"}
|
|
{"epoch": 0.9324522760646109, "step": 636, "batch_size": 64, "mean": 33.00461196899414, "std": 24.0425968170166, "min": -11.745904922485352, "p10": -0.9414062499999987, "median": 31.804126739501953, "p90": 59.25036544799805, "max": 95.77597045898438, "pos_frac": 0.890625, "sample": [20.254152297973633, 23.6064453125, 25.477386474609375, 34.81450653076172, 23.558143615722656, 11.068256378173828, 76.44383239746094, -10.374595642089844, 53.362693786621094, -1.89617919921875, -6.101108551025391, 38.137718200683594, 31.310020446777344, 46.429725646972656, 26.696929931640625, 27.46356964111328, 22.675308227539062, 59.46404266357422, 49.21820068359375, 27.87531280517578, 56.2445068359375, 20.77753448486328, 16.576560974121094, 2.991607666015625, 52.825523376464844, 28.963165283203125, 48.121551513671875, 48.62912368774414, 34.305084228515625, 30.239028930664062, -5.907356262207031, 7.2775115966796875, 38.00883102416992, 32.29823303222656, 0.8671760559082031, 88.46080017089844, 51.0540771484375, 58.75178527832031, 47.74787902832031, -9.986419677734375, 42.8304443359375, 22.284879684448242, 49.046913146972656, 75.45477294921875, 22.9764404296875, 57.13844299316406, 52.04986572265625, 76.00164794921875, 5.781734466552734, -11.745904922485352, 45.53832244873047, -1.4910888671875, 42.053680419921875, 37.503318786621094, 42.78049850463867, 27.717357635498047, 59.53346252441406, 95.77597045898438, 33.903839111328125, 37.54780578613281, 0.3411865234375, 11.570919036865234, 30.162349700927734, 29.80768585205078], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000636.npy"}
|
|
{"epoch": 0.933920704845815, "step": 637, "batch_size": 64, "mean": 32.936134338378906, "std": 25.70160675048828, "min": -12.292789459228516, "p10": 2.2160900115966813, "median": 26.831416130065918, "p90": 74.32971496582033, "max": 111.09378814697266, "pos_frac": 0.921875, "sample": [34.96148681640625, 19.51768684387207, -6.9582366943359375, 53.296546936035156, 34.01010513305664, 46.57585906982422, 51.03683090209961, -7.476285934448242, 79.42141723632812, 56.05097961425781, 44.12745666503906, 39.65337371826172, 16.249710083007812, 18.67737579345703, 43.150909423828125, 89.19114685058594, 35.5513916015625, 17.456100463867188, 64.68212890625, 41.137542724609375, 87.29385375976562, 23.639190673828125, 28.391098022460938, 75.54766845703125, 21.905075073242188, 36.24287414550781, 15.579025268554688, 35.029693603515625, 76.51876831054688, 8.62811279296875, 28.08526611328125, -12.292789459228516, 17.17559814453125, 0.7629547119140625, 18.80036163330078, -3.498350143432617, 19.643508911132812, 21.54937744140625, 43.76093673706055, 26.485502243041992, 71.48782348632812, 111.09378814697266, 10.077293395996094, 42.431640625, 26.07587432861328, 31.801345825195312, 47.87517547607422, 6.396598815917969, 51.638145446777344, 27.177330017089844, 44.81890869140625, -3.189065933227539, 23.329254150390625, 40.27099609375, 11.771480560302734, 10.142505645751953, 25.41611671447754, 83.79707336425781, 26.24708366394043, 24.632644653320312, 1.546640396118164, 25.271774291992188, 24.46282958984375, 3.778139114379883], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000637.npy"}
|
|
{"epoch": 0.9353891336270191, "step": 638, "batch_size": 64, "mean": 37.08156967163086, "std": 28.503551483154297, "min": -15.512519836425781, "p10": 6.084112548828126, "median": 32.81105422973633, "p90": 80.96904754638673, "max": 109.7422103881836, "pos_frac": 0.96875, "sample": [95.30349731445312, 2.555999755859375, 60.992523193359375, 28.81000518798828, 47.272987365722656, 14.290504455566406, 19.61411476135254, 43.39173889160156, 33.912906646728516, 4.387811660766602, 43.17823028564453, 89.06094360351562, 15.985710144042969, 68.92433166503906, 49.849945068359375, 12.246761322021484, 5.666114807128906, 44.05127716064453, 45.00858688354492, 33.06557846069336, -15.512519836425781, 22.918914794921875, 34.27448272705078, 43.41777038574219, 32.5565299987793, 109.7422103881836, 5.433032989501953, 30.795978546142578, 96.89071655273438, 39.414405822753906, 48.87497329711914, 82.41258239746094, 29.088546752929688, 46.38945007324219, 71.28758239746094, 19.181304931640625, 33.10054016113281, 8.50374984741211, 14.995635986328125, 19.18112564086914, 12.046663284301758, 26.92249298095703, 75.65193939208984, 64.33731079101562, -12.461448669433594, 4.275897979736328, 77.60079956054688, 36.314430236816406, 11.822471618652344, 10.678787231445312, 22.443544387817383, 7.059440612792969, 56.99896240234375, 27.305519104003906, 45.1925048828125, 7.5322265625, 11.304222106933594, 50.669471740722656, 87.24671936035156, 100.78077697753906, 15.441848754882812, 30.407554626464844, 48.166465759277344, 24.965248107910156], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000638.npy"}
|
|
{"epoch": 0.9368575624082232, "step": 639, "batch_size": 64, "mean": 31.09198760986328, "std": 24.72230339050293, "min": -26.329757690429688, "p10": 3.054883193969727, "median": 25.991379737854004, "p90": 68.35945205688476, "max": 83.62120056152344, "pos_frac": 0.953125, "sample": [1.6311149597167969, 31.87770652770996, 43.951385498046875, 41.64418029785156, 50.706390380859375, 2.7230377197265625, 65.97892761230469, 51.592437744140625, 36.141502380371094, 6.771148681640625, 23.083633422851562, 3.8291893005371094, 31.437828063964844, -26.329757690429688, 11.716644287109375, 23.27667236328125, 29.845169067382812, 11.64788818359375, 53.8524169921875, 10.010047912597656, 69.12566375732422, 17.98564338684082, 68.42169189453125, 49.03114318847656, 68.21422576904297, 18.662029266357422, 23.942935943603516, 26.6175594329834, 38.44940948486328, 57.00837707519531, 58.66735076904297, 8.841911315917969, 25.433303833007812, 1.9938507080078125, 33.15629196166992, 17.55660629272461, 26.248315811157227, 71.31917572021484, 63.74443054199219, 25.39287567138672, 16.535568237304688, 25.613311767578125, 50.12902069091797, 35.439125061035156, 83.62120056152344, 11.119651794433594, 24.29828643798828, 25.73444366455078, 0.6403350830078125, 22.053085327148438, 11.012956619262695, 5.170928955078125, 27.144432067871094, 15.20223617553711, -23.55634307861328, -0.7341461181640625, 36.576629638671875, 11.097213745117188, 30.745895385742188, 76.19110107421875, 22.16364860534668, 78.57838439941406, 82.54571533203125, 47.36408996582031], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000639.npy"}
|
|
{"epoch": 0.9383259911894273, "step": 640, "batch_size": 64, "mean": 36.84351348876953, "std": 30.175405502319336, "min": -1.3929557800292969, "p10": 6.761939239501953, "median": 34.40303039550781, "p90": 72.008895111084, "max": 153.1376953125, "pos_frac": 0.9375, "sample": [47.95612335205078, 81.90977478027344, 2.629148483276367, 57.66035842895508, 15.810762405395508, 14.0784912109375, 65.50475311279297, 61.3502311706543, 12.133947372436523, 7.259910583496094, 39.87748718261719, 37.37828826904297, 9.875328063964844, 34.71880340576172, 7.614269256591797, 35.52915954589844, 61.23756408691406, 8.58563232421875, 11.113700866699219, -1.1587715148925781, 20.111923217773438, 45.08818817138672, 22.837181091308594, -0.07767868041992188, 19.98556137084961, -1.3929557800292969, 25.96862030029297, 46.46076965332031, 124.55047607421875, 75.11186218261719, 74.46682739257812, 61.14158630371094, 26.984317779541016, 48.08384704589844, 153.1376953125, 0.016366958618164062, -0.4691925048828125, 15.742271423339844, 10.720123291015625, 26.23968505859375, 22.57600212097168, 34.661712646484375, 43.11407470703125, 55.71232604980469, 23.023590087890625, 34.251556396484375, 34.55450439453125, 59.55604553222656, 12.451377868652344, 38.182899475097656, 87.92745971679688, 61.19221496582031, 8.136215209960938, 52.970001220703125, 7.347198486328125, 6.54852294921875, 49.81562805175781, 43.09800338745117, 25.960281372070312, 66.27371978759766, 37.46522521972656, 94.87759399414062, 28.4735107421875, 24.042686462402344], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000640.npy"}
|
|
{"epoch": 0.9397944199706314, "step": 641, "batch_size": 64, "mean": 32.17498779296875, "std": 30.656888961791992, "min": -10.617446899414062, "p10": 7.408464431762695, "median": 25.50763702392578, "p90": 62.327046203613286, "max": 195.11923217773438, "pos_frac": 0.9375, "sample": [7.728004455566406, 8.115131378173828, 66.7528305053711, 76.02902221679688, 13.274429321289062, 41.87397766113281, 33.230194091796875, 38.368072509765625, 12.787300109863281, 7.626319885253906, 7.315097808837891, 36.32427215576172, -0.1465606689453125, 13.0794677734375, 195.11923217773438, 48.8530387878418, 12.372642517089844, 11.150169372558594, 23.382293701171875, 38.40348815917969, 59.27117919921875, 12.130645751953125, 12.04965591430664, 60.55516052246094, 20.617908477783203, 7.808563232421875, -10.617446899414062, 19.10645294189453, 22.251262664794922, 26.938949584960938, 16.075180053710938, 35.55699157714844, 10.510255813598633, -7.2738037109375, 52.90709686279297, 33.40968704223633, 89.30308532714844, 12.248733520507812, 9.988006591796875, 32.141990661621094, 27.502653121948242, -4.277868270874023, 18.099546432495117, 9.50494384765625, 21.603958129882812, 10.714637756347656, 11.761341094970703, 6.174446105957031, 50.65684509277344, 79.44496154785156, 75.40840911865234, 59.11225128173828, 54.668426513671875, 44.11943054199219, 45.47785186767578, 47.959754943847656, 44.11546325683594, 63.08642578125, 32.60981750488281, 24.076324462890625, 4.538307189941406, 38.87933349609375, 32.06773376464844, 55.27619934082031], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000641.npy"}
|
|
{"epoch": 0.9412628487518355, "step": 642, "batch_size": 64, "mean": 35.434226989746094, "std": 30.842571258544922, "min": -24.064788818359375, "p10": 2.500834655761721, "median": 29.775531768798828, "p90": 76.91655731201172, "max": 122.2484130859375, "pos_frac": 0.90625, "sample": [45.0142936706543, 55.439849853515625, 38.53041076660156, 34.44300079345703, 23.31182098388672, 17.124526977539062, 92.46223449707031, -2.6075439453125, 5.145393371582031, 31.350997924804688, 5.291542053222656, 4.896820068359375, 14.594314575195312, 13.845329284667969, 28.20006561279297, 74.886962890625, 26.745269775390625, 77.00794982910156, -23.665771484375, 23.762554168701172, 8.422233581542969, 13.7821044921875, 49.62109375, 19.76105308532715, 38.62192916870117, 9.775650024414062, 59.603721618652344, 48.85674285888672, 26.558692932128906, 52.586761474609375, 39.402984619140625, 32.737098693847656, 94.77157592773438, 60.03837203979492, 1.4739837646484375, 92.8994140625, 57.48805236816406, 76.70330810546875, -24.064788818359375, 19.218524932861328, 70.54090881347656, 47.70570373535156, 20.171485900878906, 69.29179382324219, -11.57769775390625, 78.12492370605469, 49.868194580078125, 45.325340270996094, 14.49884033203125, 32.79826354980469, 122.2484130859375, 27.105438232421875, -0.635955810546875, 6.0097503662109375, 15.834104537963867, -6.671062469482422, 19.052566528320312, 98.31739807128906, 9.45477294921875, 52.6630859375, 23.4827880859375, 60.51044464111328, 8.307731628417969, 51.32490539550781], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000642.npy"}
|
|
{"epoch": 0.9427312775330396, "step": 643, "batch_size": 64, "mean": 30.795534133911133, "std": 24.469932556152344, "min": -27.91172218322754, "p10": 4.888920593261719, "median": 29.54468536376953, "p90": 58.87992477416992, "max": 107.73214721679688, "pos_frac": 0.9375, "sample": [47.1573486328125, 49.45135498046875, 35.734153747558594, 51.39739990234375, 44.03271484375, 76.77938079833984, 29.077049255371094, 8.586891174316406, -27.91172218322754, 21.21733856201172, 4.848987579345703, 31.56866455078125, 15.80908203125, 28.140789031982422, -2.606037139892578, 67.64291381835938, 58.3485107421875, 43.424346923828125, 15.357208251953125, 59.10767364501953, 5.986656188964844, 17.579675674438477, 12.272857666015625, 9.205793380737305, 14.568645477294922, 18.138591766357422, 5.831211090087891, 35.10680389404297, 28.077415466308594, 24.37378692626953, -11.355354309082031, 22.645475387573242, -8.787490844726562, 68.67677307128906, 107.73214721679688, 80.11376953125, 20.652603149414062, 37.84357452392578, 20.59107780456543, 36.24298095703125, 20.662517547607422, 56.540679931640625, 81.19393920898438, 56.46917724609375, 18.415176391601562, 33.78301239013672, 3.7591819763183594, 39.86940002441406, 30.12036895751953, 14.658193588256836, 42.88218688964844, 50.82433319091797, 4.982097625732422, 6.3308868408203125, 30.01232147216797, 17.509916305541992, 35.522560119628906, 33.378623962402344, 44.60502624511719, 57.18550109863281, 43.76766586303711, 30.922164916992188, 3.3776016235351562, 11.480583190917969], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000643.npy"}
|
|
{"epoch": 0.9441997063142438, "step": 644, "batch_size": 64, "mean": 30.231281280517578, "std": 28.853412628173828, "min": -25.398818969726562, "p10": -0.28005638122558574, "median": 23.710854530334473, "p90": 73.54161376953127, "max": 92.43980407714844, "pos_frac": 0.875, "sample": [65.24185180664062, 16.606477737426758, 92.43980407714844, -25.398818969726562, -13.304763793945312, 32.81035614013672, -15.126541137695312, 22.140518188476562, 19.814491271972656, 17.89097023010254, 12.899551391601562, 59.836769104003906, -11.003150939941406, 35.81093978881836, 21.944679260253906, 90.63676452636719, 12.253738403320312, 23.631290435791016, 17.89532470703125, 32.00379943847656, 39.02496337890625, 55.03802490234375, 23.79041862487793, 11.64678955078125, 68.695068359375, 30.78814697265625, -5.877647399902344, 26.353029251098633, 37.99170684814453, 86.03132629394531, 11.105325698852539, 5.085262298583984, 51.88081359863281, 2.6864395141601562, 41.307159423828125, 4.320219039916992, 51.86424255371094, 26.118988037109375, 13.40267562866211, 30.207794189453125, 22.88182830810547, 17.512939453125, 61.236663818359375, 4.149343490600586, 74.6280517578125, 71.006591796875, 33.18511199951172, -0.3515281677246094, 9.034963607788086, 69.26277160644531, 53.99907684326172, 25.362258911132812, -4.808513641357422, 16.2457275390625, 35.029754638671875, 83.60218811035156, 37.90618896484375, 1.1440696716308594, 19.945510864257812, -0.11328887939453125, 0.05048942565917969, 86.66746520996094, 11.933748245239258, 84.8057861328125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000644.npy"}
|
|
{"epoch": 0.9456681350954479, "step": 645, "batch_size": 64, "mean": 28.9981746673584, "std": 28.357072830200195, "min": -17.012168884277344, "p10": -0.7628366470336914, "median": 25.833885192871094, "p90": 66.13080673217776, "max": 104.31535339355469, "pos_frac": 0.859375, "sample": [19.944427490234375, 43.01866149902344, 1.093048095703125, 26.764816284179688, 2.086517333984375, 4.8134613037109375, 44.01649475097656, 33.03233337402344, 51.73796463012695, 4.443883895874023, 21.173015594482422, 25.181961059570312, 32.04528045654297, -5.722568511962891, 19.128292083740234, 21.8900146484375, 6.56744384765625, 30.72770881652832, 104.31535339355469, 13.524076461791992, 27.24357032775879, 29.897483825683594, 53.915367126464844, 29.91211700439453, 19.961769104003906, 43.5105094909668, 18.871841430664062, 5.790863037109375, -17.012168884277344, -5.1527252197265625, -0.7767314910888672, 0.3074302673339844, -0.7304153442382812, 27.4237060546875, 99.97713470458984, 12.025299072265625, 45.408203125, 68.18850708007812, 13.475635528564453, 10.194549560546875, 103.62504577636719, 36.891265869140625, -4.222450256347656, 57.05281448364258, -0.6235504150390625, 73.43911743164062, 50.04789733886719, 56.60093688964844, 28.80426788330078, 24.149391174316406, 39.958770751953125, 2.5384674072265625, 61.329505920410156, 10.6619873046875, 77.20916748046875, 95.571044921875, 26.485809326171875, 39.27093505859375, -11.2691650390625, 49.14218521118164, 24.604888916015625, 5.297657012939453, 35.36286544799805, -8.25973129272461], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000645.npy"}
|
|
{"epoch": 0.947136563876652, "step": 646, "batch_size": 64, "mean": 33.072837829589844, "std": 22.5963191986084, "min": -12.495393753051758, "p10": 8.701551055908203, "median": 33.394636154174805, "p90": 62.8078582763672, "max": 91.25164031982422, "pos_frac": 0.953125, "sample": [91.25164031982422, 8.597389221191406, 51.105812072753906, 59.185943603515625, 28.235763549804688, 15.192554473876953, 64.360107421875, 48.4322509765625, 89.23440551757812, 8.944595336914062, 34.734100341796875, 45.70033264160156, 14.641448974609375, 11.703155517578125, 34.877479553222656, 52.893280029296875, 43.19905471801758, 13.426483154296875, 55.193275451660156, 36.45958709716797, 44.75624084472656, 44.9754638671875, 9.252891540527344, 28.162073135375977, -7.789737701416016, 57.667884826660156, 54.83439636230469, 51.426170349121094, 1.9407901763916016, 32.58181381225586, 21.727619171142578, 36.849143981933594, 21.904144287109375, 20.394363403320312, 19.47875213623047, 43.98014831542969, 14.77978515625, 34.35588073730469, 39.564369201660156, 7.670688629150391, 22.882171630859375, 70.58526611328125, 22.125885009765625, -9.081493377685547, 22.142852783203125, 13.788238525390625, 42.43586730957031, 13.570343017578125, 34.20745849609375, 74.76348876953125, 54.122161865234375, 20.067115783691406, 27.413394927978516, 4.3837127685546875, 67.71541595458984, 20.818950653076172, 40.579437255859375, 10.530906677246094, 37.35343551635742, 24.321136474609375, -12.495393753051758, 71.87098693847656, 19.434814453125, 37.24394989013672], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000646.npy"}
|
|
{"epoch": 0.9486049926578561, "step": 647, "batch_size": 64, "mean": 32.761592864990234, "std": 27.287033081054688, "min": -16.99725341796875, "p10": -4.631001281738279, "median": 28.941256523132324, "p90": 70.33505477905274, "max": 95.20999145507812, "pos_frac": 0.875, "sample": [49.06217956542969, 52.08861541748047, 40.0816650390625, 45.703338623046875, 46.42595672607422, 44.186161041259766, 61.4400634765625, 54.377593994140625, 35.94007110595703, 15.982986450195312, 63.404380798339844, 25.532241821289062, 7.0754241943359375, 28.374366760253906, 95.20999145507812, 45.381385803222656, 15.015396118164062, 89.14452362060547, 43.91541290283203, 33.63813781738281, 15.7730712890625, 3.758880615234375, 28.98274803161621, 8.891036987304688, 52.84465026855469, 78.8379135131836, -2.9851531982421875, 37.412574768066406, -16.54755401611328, 71.2988510131836, 38.051048278808594, 5.740093231201172, 20.88709259033203, 41.534446716308594, 68.1891098022461, 24.779632568359375, 26.694442749023438, 39.55451965332031, -5.33636474609375, -16.99725341796875, 20.410789489746094, 0.9617843627929688, 87.26898956298828, 3.575469970703125, -14.076923370361328, 25.284841537475586, 51.63945770263672, 28.899765014648438, 22.89719009399414, 66.6187744140625, 13.941638946533203, 26.08111000061035, 71.25474548339844, 61.627586364746094, 84.17713928222656, 15.239944458007812, -7.992607116699219, 24.279464721679688, -12.003795623779297, 28.227462768554688, 30.608901977539062, 34.250518798828125, -5.357387542724609, 25.583433151245117], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000647.npy"}
|
|
{"epoch": 0.9500734214390602, "step": 648, "batch_size": 64, "mean": 40.340911865234375, "std": 34.430606842041016, "min": -10.056533813476562, "p10": 1.6166843414306662, "median": 36.866031646728516, "p90": 83.13314590454102, "max": 146.689453125, "pos_frac": 0.921875, "sample": [28.312332153320312, 0.6949691772460938, 19.258987426757812, 81.03878021240234, 12.219524383544922, 61.309120178222656, 114.3490219116211, 146.689453125, 47.91321563720703, 43.05680847167969, 14.908985137939453, 26.87449073791504, 25.967681884765625, 55.706626892089844, 12.851776123046875, 6.4312591552734375, 48.33213806152344, 53.47084045410156, 56.90058898925781, -2.1821060180664062, -4.195047378540039, 41.58910369873047, 23.87213706970215, 28.465835571289062, 37.99397277832031, 52.910400390625, 63.24946594238281, 38.151329040527344, 34.57927703857422, 30.460552215576172, 66.68695068359375, -10.056533813476562, 35.73809051513672, 108.95175170898438, 10.463333129882812, -3.909027099609375, 17.451335906982422, 40.98526382446289, 68.58953857421875, 0.7718505859375, 22.0535888671875, 71.08155059814453, 69.63703918457031, 13.766983032226562, 9.281700134277344, 93.62628173828125, 132.2804412841797, 10.093536376953125, 10.4326171875, 22.630464553833008, 66.05247497558594, 51.02910614013672, 6.2292633056640625, 6.162143707275391, 88.75128173828125, 3.587963104248047, 70.5928955078125, 63.01202392578125, 7.88017463684082, 41.63531494140625, 40.52414321899414, 68.58592224121094, -7.993316650390625, 84.03073120117188], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000648.npy"}
|
|
{"epoch": 0.9515418502202643, "step": 649, "batch_size": 64, "mean": 28.49066925048828, "std": 28.495798110961914, "min": -16.786928176879883, "p10": -0.9296592712402343, "median": 19.965651512145996, "p90": 69.19300918579101, "max": 118.7969970703125, "pos_frac": 0.875, "sample": [10.917362213134766, 44.05290985107422, 46.377708435058594, -0.9684123992919922, 68.15569305419922, 23.375686645507812, 10.733016967773438, 25.962631225585938, 17.149505615234375, 18.08948516845703, 77.10577392578125, -4.829071044921875, 14.299549102783203, 10.485214233398438, 20.059425354003906, -4.500335693359375, 8.48370361328125, 66.1181640625, -6.6219329833984375, 50.353492736816406, 14.944435119628906, 1.9214649200439453, -16.786928176879883, 1.7001094818115234, 19.871877670288086, -0.8392353057861328, 65.88630676269531, 17.324493408203125, 4.493343353271484, 44.331298828125, 59.84994888305664, 4.8662261962890625, -1.2538528442382812, 13.897369384765625, 78.29812622070312, 36.18733215332031, 55.062469482421875, 46.05284881591797, 35.5467529296875, 51.087615966796875, 72.36886596679688, 24.83319664001465, 3.2973194122314453, 94.81436157226562, 27.99329376220703, 3.9409751892089844, 54.71417236328125, 118.7969970703125, 28.08935546875, 7.002372741699219, 2.4626941680908203, 20.921707153320312, 21.470993041992188, 69.6375732421875, 15.483322143554688, 83.54179382324219, 55.057586669921875, -4.971149444580078, 5.497959136962891, 13.658601760864258, 8.058914184570312, 18.5203857421875, 21.989646911621094, 28.980270385742188], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000649.npy"}
|
|
{"epoch": 0.9530102790014684, "step": 650, "batch_size": 64, "mean": 36.623985290527344, "std": 29.48505973815918, "min": -12.738967895507812, "p10": 2.9937297821044924, "median": 32.330135345458984, "p90": 77.70841522216797, "max": 113.77725219726562, "pos_frac": 0.953125, "sample": [-10.362449645996094, 2.8108139038085938, 42.334938049316406, 82.5655517578125, 2.7720718383789062, 71.33522033691406, 44.33701705932617, 18.459392547607422, 70.05760955810547, 26.75206184387207, 43.80570983886719, -4.8383331298828125, 101.5274429321289, 32.11158752441406, 74.76170349121094, 35.594940185546875, 42.41120147705078, 72.61518859863281, 33.281917572021484, 81.59378051757812, 78.6275634765625, 33.631103515625, 71.1939697265625, 56.4420166015625, 19.451669692993164, 21.87466049194336, 2.70513916015625, 9.971153259277344, 17.381561279296875, 17.28490447998047, 80.46371459960938, 41.27776336669922, -12.738967895507812, 22.260787963867188, 14.892107009887695, 113.77725219726562, 10.965997695922852, 113.67018127441406, 2.9330406188964844, 32.96625518798828, 10.005876541137695, 38.33753204345703, 3.1353378295898438, 50.37992858886719, 32.548683166503906, 58.768829345703125, 41.173561096191406, 22.22917938232422, 24.821807861328125, 75.56373596191406, 52.27423095703125, 23.88433837890625, 52.99449157714844, 58.31261444091797, 13.10427474975586, 14.326005935668945, 11.680694580078125, 5.917926788330078, 7.30828857421875, 26.339813232421875, 44.37939453125, 9.586570739746094, 23.936588287353516, 29.965980529785156], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000650.npy"}
|
|
{"epoch": 0.9544787077826725, "step": 651, "batch_size": 64, "mean": 35.643516540527344, "std": 26.986141204833984, "min": -6.048469543457031, "p10": 4.484818077087402, "median": 28.83005142211914, "p90": 80.8577133178711, "max": 94.82373046875, "pos_frac": 0.953125, "sample": [94.82373046875, 61.201332092285156, 22.871139526367188, 85.4984130859375, 23.196273803710938, 8.410514831542969, 29.92010498046875, 68.74010467529297, 32.42186737060547, 20.603302001953125, 26.53234100341797, 57.124908447265625, 10.511323928833008, 26.05823516845703, 42.730224609375, 38.19728088378906, 11.80642318725586, 15.483100891113281, 27.383073806762695, 55.98246765136719, 23.06855010986328, -6.048469543457031, 65.1690902709961, 49.835777282714844, 81.16461181640625, 15.539802551269531, 7.480524063110352, -2.3121414184570312, 30.21392822265625, 17.360305786132812, 33.487510681152344, 4.4465484619140625, 22.453285217285156, -2.368040084838867, 74.70315551757812, 23.301799774169922, 61.84797668457031, 40.53917694091797, 72.1787109375, 4.574113845825195, 38.55926513671875, 47.72148513793945, 2.1356868743896484, 11.625722885131836, 0.30608367919921875, 80.14161682128906, 21.940105438232422, 84.61639404296875, 2.259387969970703, 37.07838439941406, 8.447412490844727, 17.47343635559082, 87.02448272705078, 42.28952407836914, 55.705467224121094, 16.700504302978516, 44.17849349975586, 15.145530700683594, 82.31680297851562, 32.592987060546875, 27.73999786376953, 42.83937454223633, 90.03146362304688, 16.18314552307129], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000651.npy"}
|
|
{"epoch": 0.9559471365638766, "step": 652, "batch_size": 64, "mean": 35.44752502441406, "std": 28.201440811157227, "min": -4.27130126953125, "p10": 3.2466537475585966, "median": 27.82210922241211, "p90": 75.14475708007812, "max": 107.83541870117188, "pos_frac": 0.9375, "sample": [11.198535919189453, 17.693695068359375, 60.507843017578125, 10.732398986816406, -0.7998466491699219, 46.638004302978516, 39.158843994140625, 18.255889892578125, 70.30517578125, 39.207557678222656, 82.05863189697266, 61.66028594970703, 87.32545471191406, 11.959671020507812, 0.521453857421875, 17.00870132446289, 7.78155517578125, 75.30938720703125, 94.23724365234375, 1.7922916412353516, 13.535964965820312, -2.298971176147461, 28.149246215820312, 24.810806274414062, 46.35425567626953, -4.27130126953125, 27.494972229003906, 18.849273681640625, 16.674909591674805, 16.17479705810547, 26.118488311767578, 107.83541870117188, 19.81201934814453, 86.12834930419922, 42.20085906982422, 46.186492919921875, 32.98316955566406, 18.729339599609375, -2.5036678314208984, 9.730377197265625, 10.246780395507812, 25.8106689453125, 8.933338165283203, 13.836368560791016, 67.68098449707031, 56.90118408203125, 62.47344970703125, 10.003105163574219, 60.09941101074219, 21.224899291992188, 74.34646606445312, 11.42098617553711, 89.96795654296875, 40.92372131347656, 32.30295181274414, 32.66666793823242, 74.7606201171875, 2.0553741455078125, 49.54877853393555, 34.3770751953125, 52.45616149902344, 39.38974380493164, 65.94100952148438, 6.02630615234375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000652.npy"}
|
|
{"epoch": 0.9574155653450808, "step": 653, "batch_size": 64, "mean": 35.66375732421875, "std": 25.90984344482422, "min": -11.282958984375, "p10": 6.123506927490235, "median": 35.06632423400879, "p90": 75.8134521484375, "max": 112.09405517578125, "pos_frac": 0.9375, "sample": [73.79341125488281, 7.85321044921875, 5.118940353393555, 88.32197570800781, 52.53143310546875, 15.12445068359375, 16.97021484375, -11.282958984375, -6.360908508300781, 34.251953125, 88.68368530273438, 6.843223571777344, -10.267250061035156, 66.31001281738281, 45.812660217285156, 26.891448974609375, 56.655120849609375, 41.89256286621094, 57.903594970703125, 58.23634338378906, -2.318845748901367, 35.1845588684082, 39.9277229309082, 29.1947021484375, 46.06732940673828, 24.092056274414062, 46.48444366455078, 44.008567810058594, 21.48150634765625, 40.64929962158203, 84.57237243652344, 29.805801391601562, 77.69493865966797, 20.434097290039062, 34.948089599609375, 36.89263916015625, 5.870567321777344, 43.691688537597656, 40.936363220214844, 45.77460479736328, 112.09405517578125, 26.277130126953125, 1.3166389465332031, 10.499837875366211, 84.16433715820312, 42.71320343017578, 10.691146850585938, 32.368011474609375, 35.44813537597656, 16.58573341369629, 27.161209106445312, 20.19108009338379, 76.67918395996094, 38.45478820800781, 27.233579635620117, 37.69002914428711, 6.7136993408203125, 11.789710998535156, 47.57135009765625, 41.482810974121094, 55.34442901611328, 15.058586120605469, 21.483795166015625, 22.792465209960938], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000653.npy"}
|
|
{"epoch": 0.9588839941262849, "step": 654, "batch_size": 64, "mean": 29.483509063720703, "std": 28.891340255737305, "min": -21.131942749023438, "p10": -2.0206186294555653, "median": 24.253183364868164, "p90": 64.92828521728516, "max": 145.5150146484375, "pos_frac": 0.859375, "sample": [6.778800964355469, 81.03443908691406, 15.702713012695312, 25.61902618408203, 23.289749145507812, 15.739112854003906, -14.310951232910156, 17.867889404296875, 10.086727142333984, 63.82734680175781, 41.0898323059082, 33.226539611816406, 25.65684700012207, 25.586776733398438, 27.495349884033203, -11.090950012207031, 22.604408264160156, 30.352508544921875, 33.482669830322266, 49.02809524536133, 32.201873779296875, 145.5150146484375, 19.720138549804688, 17.696258544921875, 19.09040069580078, 53.63215637207031, -0.6386260986328125, 21.058704376220703, 21.660118103027344, -0.978668212890625, 20.62261199951172, 45.67593002319336, 56.49286651611328, 49.08349609375, 18.688262939453125, 20.018043518066406, 47.340911865234375, -2.709320068359375, 1.9641437530517578, 65.40011596679688, 59.55149841308594, 25.216617584228516, 43.230987548828125, 8.852499008178711, -11.653125762939453, 69.26644134521484, 27.743133544921875, 65.69364929199219, 81.93960571289062, 12.92569351196289, 34.735260009765625, 50.895355224609375, 35.508575439453125, 19.096702575683594, 3.6739578247070312, -2.4671688079833984, 44.722267150878906, -6.694028854370117, 106.62451171875, 26.594345092773438, 20.434356689453125, 13.0863037109375, -21.131942749023438, 4.49761962890625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000654.npy"}
|
|
{"epoch": 0.960352422907489, "step": 655, "batch_size": 64, "mean": 30.53487205505371, "std": 29.86149024963379, "min": -13.743030548095703, "p10": -0.8515663146972655, "median": 25.702972412109375, "p90": 74.79225845336914, "max": 125.78433227539062, "pos_frac": 0.875, "sample": [25.785194396972656, 21.29440689086914, 81.71101379394531, 27.568376541137695, -2.7770862579345703, 87.37763977050781, -0.9015655517578125, 68.20458984375, 3.7334518432617188, 14.107681274414062, 38.00923156738281, 3.710874557495117, 10.627670288085938, -13.743030548095703, 26.25397300720215, 10.189308166503906, 23.04004669189453, 125.78433227539062, 10.839729309082031, 47.9996337890625, 31.584991455078125, 50.52185821533203, 92.79867553710938, 50.10456085205078, 1.8926620483398438, 15.490470886230469, 48.263938903808594, 43.112701416015625, -13.435806274414062, 13.564453125, -2.545602798461914, 75.52206420898438, 41.242431640625, -0.7349014282226562, 47.687835693359375, 36.0679931640625, 5.595478057861328, 35.98677062988281, 48.07440185546875, 33.45543670654297, 3.109455108642578, 20.084747314453125, 31.76105499267578, 33.433815002441406, 80.18943786621094, 8.16229248046875, 25.620750427246094, 0.9679718017578125, 43.94892120361328, 33.83795166015625, -3.3311614990234375, 7.3656463623046875, 27.873291015625, 22.231693267822266, 73.0893783569336, -5.726711273193359, 109.90081787109375, 3.772216796875, 10.376937866210938, 65.69779968261719, 46.797325134277344, 21.900543212890625, 12.012981414794922, 18.088760375976562], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000655.npy"}
|
|
{"epoch": 0.9618208516886931, "step": 656, "batch_size": 64, "mean": 41.93996047973633, "std": 26.170562744140625, "min": -9.511810302734375, "p10": 6.463903427124024, "median": 39.751220703125, "p90": 70.3494758605957, "max": 120.03076171875, "pos_frac": 0.953125, "sample": [81.0235824584961, 39.28685760498047, 28.42902374267578, -1.2963066101074219, 16.93561553955078, 66.76351165771484, 7.260837554931641, 55.750946044921875, 37.47069549560547, 66.63402557373047, 6.1223602294921875, 42.05564880371094, 54.85917663574219, 54.672149658203125, 18.297983169555664, 51.34449005126953, 11.168228149414062, 25.156784057617188, 19.727188110351562, 34.08224868774414, 36.47289276123047, 64.52606201171875, 59.043922424316406, 3.102874755859375, -9.511810302734375, 28.31359100341797, 49.66932678222656, 63.20996856689453, 43.16044616699219, 96.90225219726562, 31.854869842529297, 66.12255859375, 62.558860778808594, -1.0611133575439453, 68.70006561279297, 52.83412170410156, 61.987815856933594, 51.1304931640625, 62.193695068359375, 0.46484375, 87.77552032470703, 51.16645812988281, 72.48473358154297, 71.05636596679688, 8.724355697631836, 81.70339965820312, 39.19808578491211, 120.03076171875, 33.39909362792969, 56.159461975097656, 36.82001495361328, 36.00170135498047, 20.782899856567383, 5.8512115478515625, 39.42543029785156, 43.41869354248047, 17.1392822265625, 23.68358612060547, 64.02680206298828, 11.367225646972656, 40.07701110839844, 38.305686950683594, 22.296600341796875, 55.842315673828125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000656.npy"}
|
|
{"epoch": 0.9632892804698973, "step": 657, "batch_size": 64, "mean": 37.42141342163086, "std": 28.833955764770508, "min": -21.648757934570312, "p10": 5.915207290649415, "median": 34.29959487915039, "p90": 77.25778350830079, "max": 121.79554748535156, "pos_frac": 0.96875, "sample": [24.450439453125, 2.140716552734375, 18.760421752929688, 52.613922119140625, 78.18572998046875, 121.05000305175781, 38.99640655517578, 25.108612060546875, 56.10050964355469, 7.451530456542969, -18.421417236328125, 29.11456298828125, 24.189056396484375, 17.573776245117188, 34.676239013671875, 48.78587341308594, 2.5128631591796875, 49.88313293457031, 33.922950744628906, 75.09257507324219, 13.058914184570312, 54.89030838012695, 40.106666564941406, 90.96466064453125, 29.666358947753906, 78.67512512207031, 20.15482521057129, 48.154869079589844, 15.072181701660156, 5.4716033935546875, 6.950283050537109, 33.343875885009766, 5.414882659912109, 10.772331237792969, 32.39950942993164, 55.79225158691406, 48.50941467285156, 17.41461944580078, 24.245574951171875, 10.970312118530273, 43.73652648925781, 53.418731689453125, 3.7670116424560547, 46.952552795410156, 90.26817321777344, 18.75218391418457, 48.45904541015625, 51.375396728515625, 35.88007354736328, 29.72454833984375, 98.24415588378906, 52.382781982421875, 37.014556884765625, 69.51841735839844, 29.175338745117188, 11.048858642578125, 54.286231994628906, 121.79554748535156, 44.071075439453125, -21.648757934570312, 37.11573028564453, 11.34184455871582, 24.16252899169922, 39.91143798828125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000657.npy"}
|
|
{"epoch": 0.9647577092511013, "step": 658, "batch_size": 64, "mean": 35.11966323852539, "std": 30.29734992980957, "min": -23.62458038330078, "p10": 1.6117561340332032, "median": 30.515384674072266, "p90": 72.28136901855471, "max": 136.17062377929688, "pos_frac": 0.921875, "sample": [29.41375732421875, 6.457366943359375, 11.306793212890625, -12.360847473144531, 1.0215644836425781, 38.93533706665039, 61.34008026123047, 26.94791030883789, 30.706344604492188, 58.77239990234375, 13.844554901123047, 136.17062377929688, 30.324424743652344, 74.58370971679688, 26.413116455078125, 55.248077392578125, 17.98172378540039, 32.57819366455078, 9.004379272460938, -6.214962005615234, 15.3870849609375, 13.325447082519531, 43.72621154785156, 34.459197998046875, 18.16510772705078, 31.258224487304688, 66.90924072265625, 44.340545654296875, 81.01150512695312, -23.62458038330078, 1.6296844482421875, 17.632535934448242, 21.95378875732422, 74.78443908691406, 13.77569580078125, 100.36863708496094, 6.432914733886719, 83.94374084472656, 17.73760223388672, 10.673826217651367, 51.03964614868164, 45.659400939941406, 63.5601806640625, 56.06007385253906, 51.35646057128906, 18.314224243164062, 47.828460693359375, 1.6040725708007812, 59.164886474609375, 57.45649719238281, 35.24303436279297, 40.2652473449707, -8.63694953918457, 57.469505310058594, 25.242870330810547, 46.60869598388672, 64.83526611328125, 109.90859985351562, 1.8846206665039062, 45.802093505859375, 23.016006469726562, 23.72930145263672, -9.007911682128906, 22.88880157470703], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000658.npy"}
|
|
{"epoch": 0.9662261380323054, "step": 659, "batch_size": 64, "mean": 34.305973052978516, "std": 29.324827194213867, "min": -21.34691619873047, "p10": -1.4792613983154295, "median": 35.50633239746094, "p90": 66.81596527099609, "max": 112.21209716796875, "pos_frac": 0.84375, "sample": [20.77475929260254, -1.1366348266601562, 8.17416763305664, 42.63790512084961, 10.804573059082031, 37.89310073852539, 112.21209716796875, 61.311485290527344, 0.9988079071044922, 11.840476989746094, 100.03646850585938, -8.741357803344727, 44.3973388671875, 1.3974609375, 65.4127197265625, 20.25421142578125, 37.78466796875, 28.154064178466797, 39.918212890625, 17.761886596679688, 22.778013229370117, -21.34691619873047, -0.043704986572265625, 66.46029663085938, 40.018531799316406, 79.5093994140625, 62.21558380126953, 50.768211364746094, 63.100372314453125, 78.327392578125, 59.158966064453125, 27.477163314819336, -1.3005123138427734, 12.942901611328125, 51.12577819824219, 50.748748779296875, 66.48716735839844, 16.539783477783203, 11.15020751953125, 47.13395690917969, 33.41608810424805, 11.057167053222656, 58.221473693847656, -1.555868148803711, 69.26100158691406, -7.556480407714844, 4.636817932128906, 31.56134033203125, 52.72504806518555, 87.04863739013672, 64.95289611816406, 25.47867774963379, -1.5913772583007812, 42.025962829589844, 34.508785247802734, 19.36505126953125, 66.15686798095703, 36.50387954711914, 66.95687866210938, -10.986465454101562, 40.34466552734375, 38.23970031738281, -7.962211608886719, 7.636016845703125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000659.npy"}
|
|
{"epoch": 0.9676945668135095, "step": 660, "batch_size": 64, "mean": 37.83965301513672, "std": 27.794391632080078, "min": -5.1110687255859375, "p10": 3.416747856140137, "median": 35.600074768066406, "p90": 66.91301651000977, "max": 127.02607727050781, "pos_frac": 0.921875, "sample": [44.50879669189453, 44.09132385253906, 21.63726806640625, 45.500450134277344, -1.623321533203125, 66.40193939208984, 89.81242370605469, -2.0670394897460938, 19.970314025878906, 58.40804672241211, 18.991613388061523, 37.72049331665039, 43.914772033691406, 53.48753356933594, 29.752761840820312, 68.63004302978516, 34.12480163574219, -5.1110687255859375, 53.64577102661133, 13.350799560546875, 29.581329345703125, 22.047760009765625, 55.394752502441406, 42.635032653808594, 63.86659240722656, 99.6961898803711, -2.966634750366211, 12.927316665649414, 69.968994140625, 61.518272399902344, 19.94024658203125, 34.59437561035156, 19.930809020996094, 46.639190673828125, 32.123504638671875, 61.32752990722656, 54.85259246826172, 54.41893005371094, 49.06664276123047, 20.763412475585938, 3.3418617248535156, 21.892597198486328, 38.245086669921875, 41.803977966308594, 32.141456604003906, 12.849334716796875, 3.591482162475586, 122.27113342285156, 127.02607727050781, 18.638038635253906, 29.539535522460938, 52.53990936279297, 22.831378936767578, 67.13204956054688, 57.64073181152344, -1.7434844970703125, 22.464401245117188, 36.60577392578125, 20.261932373046875, 51.24694061279297, 11.663711547851562, 1.4299201965332031, 4.245294570922852, 40.60395431518555], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000660.npy"}
|
|
{"epoch": 0.9691629955947136, "step": 661, "batch_size": 64, "mean": 34.966941833496094, "std": 30.865558624267578, "min": -28.07427978515625, "p10": 3.4970308303833018, "median": 27.793622970581055, "p90": 73.81653900146485, "max": 148.329345703125, "pos_frac": 0.921875, "sample": [29.504146575927734, 0.24277305603027344, 66.75566101074219, 24.04169273376465, 36.73855972290039, 27.586688995361328, 36.4174919128418, -9.479705810546875, 61.32965087890625, 73.19606018066406, 37.063316345214844, 22.353195190429688, 14.965965270996094, 59.9984130859375, 148.329345703125, 44.673118591308594, 31.260086059570312, 19.683128356933594, -0.8605194091796875, 22.749048233032227, 58.214420318603516, 4.374835968017578, 16.52547836303711, 5.866939544677734, 3.120828628540039, 68.76284790039062, 15.081886291503906, 75.92509460449219, 77.76904296875, 9.486007690429688, -13.008193969726562, 28.572731018066406, 18.42108154296875, 79.54273986816406, 47.242652893066406, 7.70536994934082, 13.344703674316406, 15.532371520996094, 22.634130477905273, 22.96038818359375, 6.051794052124023, 27.304901123046875, 22.991058349609375, 80.96385955810547, 45.18391418457031, -12.206161499023438, 66.99563598632812, 26.672279357910156, 17.771709442138672, 55.55959701538086, 74.08245849609375, -28.07427978515625, 17.707656860351562, 7.468400955200195, 58.38568878173828, 59.839805603027344, 26.022024154663086, 66.13803100585938, 37.03266143798828, 114.63467407226562, 31.425640106201172, 43.86045837402344, 39.448394775390625, 28.00055694580078], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000661.npy"}
|
|
{"epoch": 0.9706314243759178, "step": 662, "batch_size": 64, "mean": 30.80561065673828, "std": 25.711318969726562, "min": -15.023521423339844, "p10": 6.280214881896973, "median": 23.169673919677734, "p90": 65.84209289550782, "max": 109.64275360107422, "pos_frac": 0.953125, "sample": [11.733966827392578, 41.98394012451172, 57.05699157714844, -11.345226287841797, 63.846778869628906, 32.952659606933594, 69.33949279785156, -15.023521423339844, 45.80635452270508, 23.141845703125, 31.94928741455078, 19.62152862548828, 8.790756225585938, 25.548553466796875, 26.389617919921875, 19.998794555664062, 5.751752853393555, 14.501888275146484, 88.54264068603516, 33.55714416503906, 66.69722747802734, 22.809585571289062, 32.04267120361328, 32.82033157348633, 12.1256103515625, 22.98406982421875, 30.717403411865234, 51.46385192871094, 13.742630004882812, 18.573211669921875, 13.904350280761719, 8.818456649780273, 11.813430786132812, 7.295492172241211, 55.355865478515625, 40.84754943847656, 14.792831420898438, 26.54998016357422, 44.81366729736328, 23.19750213623047, 17.587356567382812, 43.11572265625, 78.69090270996094, 80.7860107421875, 41.43450927734375, 22.210609436035156, 16.402671813964844, 19.888072967529297, 30.725006103515625, 35.20684814453125, 19.249135971069336, 0.17183303833007812, 18.785903930664062, 14.490955352783203, 6.668663024902344, 6.113737106323242, 107.50247192382812, 109.64275360107422, 41.510528564453125, 54.62067413330078, 49.763633728027344, 0.24335479736328125, 14.62811279296875, -3.3913116455078125], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000662.npy"}
|
|
{"epoch": 0.9720998531571219, "step": 663, "batch_size": 64, "mean": 33.04689407348633, "std": 24.61198616027832, "min": -21.303390502929688, "p10": 6.674959945678711, "median": 30.33756732940674, "p90": 61.595861053466805, "max": 123.40316772460938, "pos_frac": 0.984375, "sample": [50.751808166503906, 37.72486877441406, 56.12176513671875, 15.02204704284668, 46.327735900878906, 32.561187744140625, 6.684276580810547, 31.46942138671875, 9.541961669921875, 7.010688781738281, 30.43839454650879, 6.670967102050781, 50.518409729003906, 50.765380859375, 25.055587768554688, 35.999725341796875, 30.236740112304688, 48.65229034423828, 10.433982849121094, 17.6851806640625, 36.27063751220703, 15.893508911132812, 13.591808319091797, 20.723121643066406, 63.91242218017578, 54.48828125, 54.53416442871094, 26.850296020507812, 64.28158569335938, 41.23345947265625, 14.044536590576172, 79.56878662109375, 89.17645263671875, -21.303390502929688, 4.787872314453125, 44.62434387207031, 41.88343048095703, 21.31058120727539, 17.5130558013916, 62.493446350097656, 59.501495361328125, 70.55802917480469, 56.03948211669922, 15.156196594238281, 24.098419189453125, 36.97746276855469, 15.987239837646484, 28.287704467773438, 17.701955795288086, 123.40316772460938, 56.91699981689453, 55.937255859375, 46.29176330566406, 4.419921875, 22.024444580078125, 10.660324096679688, 4.709201812744141, 5.30177116394043, 41.00181579589844, 7.616462707519531, 5.386695861816406, 36.65898895263672, 9.94842529296875, 14.865310668945312], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000663.npy"}
|
|
{"epoch": 0.973568281938326, "step": 664, "batch_size": 64, "mean": 37.14731979370117, "std": 27.36865997314453, "min": -51.943485260009766, "p10": 4.58526954650879, "median": 37.534637451171875, "p90": 69.7172103881836, "max": 95.9025650024414, "pos_frac": 0.9375, "sample": [71.2774658203125, 33.67870330810547, 11.702556610107422, 79.50355529785156, 54.7274169921875, 61.48046875, 41.25830078125, 81.57588958740234, 41.837127685546875, 60.506256103515625, -0.6601715087890625, 59.13848876953125, 54.697784423828125, 16.090913772583008, 91.85424041748047, 33.02830505371094, 34.602447509765625, 64.04249572753906, 19.750244140625, 66.07661437988281, 56.44389343261719, 46.688018798828125, 13.638107299804688, 60.778045654296875, 4.234149932861328, 12.210334777832031, 51.61668014526367, 13.354377746582031, 6.1572418212890625, 25.56634521484375, 5.404548645019531, 54.01951217651367, 80.87254333496094, 26.722007751464844, 23.392425537109375, 53.96672821044922, -51.943485260009766, 12.231010437011719, 48.663368225097656, 53.972412109375, -9.992225646972656, 27.317169189453125, 30.33941650390625, 48.18505859375, 39.17327880859375, 55.332000732421875, 29.932830810546875, 28.4940128326416, 91.09439086914062, 18.14574432373047, 3.5317916870117188, 31.686859130859375, 18.054885864257812, 12.775291442871094, 50.212249755859375, 52.46087646484375, 45.51249694824219, -5.958271026611328, 38.304534912109375, 23.769298553466797, 38.2525634765625, 36.81671142578125, 95.9025650024414, 3.9277172088623047], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000664.npy"}
|
|
{"epoch": 0.9750367107195301, "step": 665, "batch_size": 64, "mean": 31.53219223022461, "std": 23.78449058532715, "min": -7.907407760620117, "p10": 3.0599838256835956, "median": 31.692195892333984, "p90": 58.89931678771973, "max": 120.5572509765625, "pos_frac": 0.9375, "sample": [2.2639694213867188, 58.674198150634766, 17.424564361572266, 21.457252502441406, 37.27435302734375, 37.71638488769531, 32.37676239013672, 14.8111572265625, 34.40840148925781, 50.36427307128906, 13.236419677734375, 23.388587951660156, 7.691802978515625, 70.0084228515625, 14.482887268066406, 19.45741844177246, 1.5891876220703125, 56.01002502441406, 5.872947692871094, 32.89390563964844, 38.99017333984375, 40.586585998535156, 40.857566833496094, 55.01665496826172, -7.907407760620117, 33.97332000732422, 15.531036376953125, 36.07660675048828, 64.49698638916016, 8.892597198486328, 4.917350769042969, 22.356477737426758, 47.84483337402344, 58.54732894897461, 44.685821533203125, 14.083234786987305, 36.036277770996094, 43.44319152832031, 29.93743133544922, 11.964393615722656, 1.8732185363769531, 44.49517059326172, 85.96533966064453, -0.7871246337890625, 38.10545349121094, 60.05635070800781, -0.8560810089111328, 22.41606903076172, 58.99579620361328, 39.616493225097656, 6.573295593261719, 16.657764434814453, 47.51548385620117, 9.133087158203125, 62.77714538574219, 26.410789489746094, 10.464035034179688, 15.995269775390625, 51.98797607421875, 54.600257873535156, 120.5572509765625, -7.1396636962890625, 31.00762939453125, 29.905925750732422], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000665.npy"}
|
|
{"epoch": 0.9765051395007343, "step": 666, "batch_size": 64, "mean": 35.28595733642578, "std": 26.626768112182617, "min": -26.87394905090332, "p10": 2.087860488891603, "median": 35.888084411621094, "p90": 75.0630470275879, "max": 90.63648986816406, "pos_frac": 0.90625, "sample": [44.839324951171875, 22.905975341796875, 46.05503845214844, 21.683162689208984, -5.096784591674805, 63.54938507080078, 46.364173889160156, 42.03068542480469, 24.461563110351562, 80.39794921875, -0.9208145141601562, 9.692718505859375, 1.48321533203125, 8.254257202148438, 27.144195556640625, 37.531394958496094, 85.04193115234375, 23.635231018066406, 14.540218353271484, 52.28591537475586, 74.37554931640625, 35.48951721191406, 19.548349380493164, 36.286651611328125, 90.63648986816406, -4.76654052734375, 40.070411682128906, 3.498699188232422, 19.538047790527344, -23.28448486328125, 47.607666015625, 56.46485900878906, 25.51759910583496, 75.3576889038086, 49.95048522949219, 38.0478515625, 57.12732696533203, 11.97137451171875, 29.991294860839844, 82.01020812988281, 13.707927703857422, 87.15619659423828, -26.87394905090332, 77.5186538696289, 13.101211547851562, 46.70968246459961, -4.145145416259766, 61.917877197265625, 24.503170013427734, 23.944137573242188, 53.59516906738281, 40.81711196899414, 19.71270751953125, 22.089466094970703, 18.168052673339844, 27.794189453125, 20.632938385009766, 52.69661331176758, 58.66841125488281, 39.72528839111328, 12.77154541015625, 58.828887939453125, 60.816184997558594, 43.12702178955078], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000666.npy"}
|
|
{"epoch": 0.9779735682819384, "step": 667, "batch_size": 64, "mean": 34.412696838378906, "std": 24.72762107849121, "min": -16.377269744873047, "p10": 5.710640716552735, "median": 33.06393814086914, "p90": 61.63188552856446, "max": 106.17068481445312, "pos_frac": 0.96875, "sample": [17.188339233398438, 14.521232604980469, 19.55269432067871, -16.377269744873047, 13.372665405273438, 47.291221618652344, 24.532135009765625, 43.20178985595703, 40.656158447265625, 9.9954833984375, 4.810380935668945, 3.4813995361328125, 20.945274353027344, 43.51170349121094, 26.501928329467773, 59.08440399169922, 51.40925598144531, 8.833106994628906, 84.54893493652344, 37.91765594482422, 22.29840087890625, 51.2312126159668, 51.56695556640625, 55.44764709472656, 50.01423645019531, 13.228828430175781, 29.56171989440918, 25.089759826660156, 52.514007568359375, 50.023780822753906, 22.98796844482422, 40.83863830566406, 106.17068481445312, 74.68832397460938, 47.606903076171875, -7.440765380859375, 4.1528167724609375, 14.761993408203125, 39.83141326904297, 45.56793212890625, 9.059036254882812, 58.25286865234375, 25.03594970703125, 5.5398101806640625, 79.13618469238281, 62.723663330078125, 73.61663818359375, 55.99565887451172, 45.38987731933594, 27.47265625, 5.4571990966796875, 47.25572204589844, 14.358749389648438, 90.22900390625, 34.11333084106445, 51.826454162597656, 32.48908233642578, 11.378395080566406, 33.6387939453125, 10.8719482421875, 7.246795654296875, 6.109245300292969, 51.78273010253906, 18.311866760253906], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000667.npy"}
|
|
{"epoch": 0.9794419970631424, "step": 668, "batch_size": 64, "mean": 33.14695358276367, "std": 26.5964298248291, "min": -19.394302368164062, "p10": 1.422550678253174, "median": 30.69057846069336, "p90": 70.86981430053713, "max": 98.8163070678711, "pos_frac": 0.921875, "sample": [58.12181091308594, 48.01048278808594, 83.78840637207031, -12.39471435546875, 6.124629974365234, 9.69082260131836, 64.430419921875, 15.164756774902344, 52.742828369140625, 30.05893325805664, 29.406585693359375, -6.8665924072265625, 23.46784210205078, 51.09329605102539, 79.41107177734375, 32.49830627441406, -10.509414672851562, 0.9705047607421875, 10.932548522949219, 21.260114669799805, 32.474464416503906, 41.12358856201172, 27.073341369628906, 26.902114868164062, 55.661781311035156, 30.617225646972656, 15.088455200195312, 3.585296630859375, 3.554779052734375, 39.325904846191406, 44.41907501220703, 59.78953552246094, 30.763931274414062, 30.846885681152344, 42.81797790527344, 45.63384246826172, -5.585456848144531, 86.05292510986328, 17.588546752929688, 1.9691524505615234, 49.07427978515625, 63.10607147216797, 73.62955474853516, 24.554407119750977, -19.394302368164062, 74.351318359375, 8.604400634765625, 15.967498779296875, 25.524051666259766, 7.026409149169922, 37.36659240722656, 30.473392486572266, 31.77813720703125, 50.702903747558594, 1.67767333984375, 25.201438903808594, 37.854286193847656, 58.98387908935547, 98.8163070678711, 54.12190246582031, 1.3132123947143555, 58.936981201171875, 17.17822265625, 77.45028686523438], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000668.npy"}
|
|
{"epoch": 0.9809104258443465, "step": 669, "batch_size": 64, "mean": 38.299198150634766, "std": 32.85043716430664, "min": -26.481550216674805, "p10": 0.7169198989868165, "median": 37.22895050048828, "p90": 73.90769729614259, "max": 132.2518310546875, "pos_frac": 0.90625, "sample": [42.2030029296875, 7.402458190917969, -14.723047256469727, 70.99903106689453, -26.481550216674805, 71.4336166381836, 22.8521728515625, 66.16065979003906, 15.065038681030273, 0.6639537811279297, 132.2518310546875, 9.34450912475586, 8.270843505859375, 39.73277282714844, -6.84735107421875, 98.23212432861328, 35.74810028076172, 14.707002639770508, 19.98846435546875, 50.85274124145508, 60.92657470703125, 33.32585906982422, 74.968017578125, 25.036590576171875, 64.34370422363281, 57.401512145996094, 45.11188507080078, 93.75920104980469, 56.426368713378906, 4.785541534423828, 63.33922576904297, 109.78250122070312, 49.35224914550781, 68.81027221679688, 87.23267364501953, -5.038972854614258, 5.104183197021484, 4.825065612792969, 95.21896362304688, 51.10259246826172, 20.044471740722656, 7.81768798828125, 28.973731994628906, 41.358821868896484, -20.0206298828125, 61.75653839111328, 11.425888061523438, 70.83598327636719, 38.709800720214844, 19.529346466064453, 28.102561950683594, 60.974735260009766, 12.624282836914062, 20.845348358154297, 28.029327392578125, 39.78507995605469, -7.8606109619140625, 64.63154602050781, 54.06210708618164, 61.64822006225586, 30.422595977783203, 29.11359405517578, 0.8405075073242188, 43.82734298706055], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000669.npy"}
|
|
{"epoch": 0.9823788546255506, "step": 670, "batch_size": 64, "mean": 39.020320892333984, "std": 26.109134674072266, "min": -7.844768524169922, "p10": 7.713055419921876, "median": 39.65853500366211, "p90": 74.46239013671875, "max": 96.96675109863281, "pos_frac": 0.953125, "sample": [31.647947311401367, 54.2125244140625, 60.318748474121094, 68.0026626586914, 27.082401275634766, 13.291109085083008, -3.1198348999023438, 10.50820541381836, 74.55767822265625, 51.36383056640625, 49.38691711425781, 12.206253051757812, -0.8127422332763672, 46.54058837890625, 63.36256408691406, 45.87446594238281, 24.295166015625, 9.842247009277344, 54.7659912109375, 14.029491424560547, 9.722415924072266, 10.311866760253906, 81.58500671386719, 22.35076904296875, 73.31355285644531, 43.095787048339844, 23.08767318725586, 20.431949615478516, 66.64122772216797, 15.46063232421875, 77.40589141845703, 59.08690643310547, 13.742427825927734, 44.31990051269531, 47.581295013427734, 85.42122650146484, 64.65162658691406, 7.043216705322266, 68.62112426757812, 91.15653991699219, -7.844768524169922, 76.53861999511719, 22.851806640625, 21.97027587890625, 74.24005126953125, 61.89789962768555, 20.309677124023438, 52.76093673706055, 1.3038787841796875, 34.00033187866211, 8.849113464355469, 96.96675109863281, 50.60406494140625, 44.06353759765625, 18.017480850219727, 7.226173400878906, 33.28820037841797, 36.384979248046875, 47.16654968261719, 51.840293884277344, 6.084751129150391, 42.932090759277344, 30.68090057373047, 32.77954864501953], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000670.npy"}
|
|
{"epoch": 0.9838472834067548, "step": 671, "batch_size": 64, "mean": 35.541099548339844, "std": 29.027013778686523, "min": -11.566856384277344, "p10": 5.84485378265381, "median": 28.887191772460938, "p90": 78.92247314453128, "max": 118.17950439453125, "pos_frac": 0.953125, "sample": [2.5947418212890625, 29.1798095703125, 15.152950286865234, 53.14019775390625, 7.203340530395508, 97.56037902832031, 42.544532775878906, 7.496894836425781, 12.929237365722656, 56.094451904296875, 35.0928955078125, 16.643516540527344, 56.46295928955078, 34.35710906982422, 9.14491081237793, 18.628890991210938, 25.45736312866211, 21.740028381347656, 58.45988464355469, 66.47625732421875, 6.893087387084961, 54.44269561767578, 2.189727783203125, 71.96492004394531, 91.60783386230469, 41.56550598144531, 81.90428161621094, 46.489295959472656, 60.22559356689453, 28.594573974609375, 5.395610809326172, 9.722419738769531, 8.507865905761719, 12.959129333496094, 57.27284240722656, 12.259147644042969, 70.05288696289062, 32.98192596435547, 51.99878692626953, 19.462112426757812, 60.186187744140625, -11.566856384277344, 26.102584838867188, 10.604904174804688, 57.212432861328125, 36.832672119140625, 26.867935180664062, 96.15293884277344, 3.6228256225585938, 21.928688049316406, 54.38056182861328, 29.46385955810547, -2.096323013305664, 22.14666748046875, 22.689910888671875, 11.67352294921875, -7.849952697753906, 7.125724792480469, 14.324043273925781, 89.6888656616211, 29.83782196044922, 118.17950439453125, 81.96585083007812, 42.3052978515625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000671.npy"}
|
|
{"epoch": 0.9853157121879589, "step": 672, "batch_size": 64, "mean": 32.39436721801758, "std": 30.078792572021484, "min": -13.848587036132812, "p10": -1.1314830780029297, "median": 27.38913917541504, "p90": 72.87146682739261, "max": 121.78811645507812, "pos_frac": 0.875, "sample": [23.90238380432129, 17.37903594970703, 10.213188171386719, 22.27912139892578, 3.3547935485839844, -1.0420417785644531, 41.29234313964844, 46.9290771484375, 94.24393463134766, 2.3310012817382812, 6.105068206787109, 21.34772491455078, -13.848587036132812, 33.30778503417969, 43.724449157714844, -3.6283493041992188, 26.873138427734375, -10.849937438964844, 34.125999450683594, 104.27831268310547, 27.905139923095703, 66.24553680419922, 1.7717666625976562, -1.1698150634765625, 48.00993347167969, 13.552162170410156, 75.71115112304688, 24.78310203552246, 32.349308013916016, 53.570091247558594, -2.8882217407226562, 95.62295532226562, 121.78811645507812, 37.883392333984375, 64.94479370117188, 24.875526428222656, 6.557857513427734, 30.760498046875, 37.944129943847656, 58.31439971923828, 30.157508850097656, 30.740066528320312, 10.774419784545898, 58.6895751953125, 23.60256576538086, 37.81224060058594, 59.120811462402344, 45.370025634765625, 14.412635803222656, 30.321231842041016, 4.650764465332031, 19.858291625976562, 4.6340789794921875, 23.26087188720703, 4.94780158996582, -9.048372268676758, 13.109443664550781, 44.645172119140625, 66.17115020751953, -3.899810791015625, 80.98126983642578, 63.03752136230469, 88.34211730957031, 10.698020935058594], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000672.npy"}
|
|
{"epoch": 0.986784140969163, "step": 673, "batch_size": 64, "mean": 37.07730484008789, "std": 27.53151512145996, "min": -8.660566329956055, "p10": 2.995305252075197, "median": 34.42546081542969, "p90": 71.62473678588869, "max": 109.17060852050781, "pos_frac": 0.96875, "sample": [4.654762268066406, 2.202281951904297, 5.280109405517578, 41.130523681640625, 23.517494201660156, 109.17060852050781, 52.05754852294922, 20.269351959228516, 101.21280670166016, 52.94470977783203, 0.3701438903808594, 67.83567810058594, 24.43987274169922, 57.709197998046875, 4.391147613525391, 26.42333984375, -8.660566329956055, 69.35405731201172, 64.1878662109375, 26.480932235717773, 36.692893981933594, 51.76045227050781, 38.409454345703125, 2.3970870971679688, 64.3634033203125, 10.207286834716797, 17.773727416992188, 66.474609375, -1.1971397399902344, 16.217666625976562, 51.017452239990234, 64.32124328613281, 41.161277770996094, 43.310909271240234, 15.786905288696289, 93.35209655761719, 28.360275268554688, 31.112472534179688, 77.97929382324219, 34.57501220703125, 14.28335952758789, 12.485803604125977, 44.348304748535156, 67.45768737792969, 46.45231246948242, 2.0270767211914062, 19.185110092163086, 10.525968551635742, 34.275909423828125, 84.75740814208984, 39.08837890625, 23.641958236694336, 86.52991485595703, 62.51133728027344, 0.9727783203125, 37.3488883972168, 30.066253662109375, 45.4407958984375, 43.37552261352539, 10.538642883300781, 12.990684509277344, 21.077049255371094, 72.59788513183594, 21.92015266418457], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000673.npy"}
|
|
{"epoch": 0.9882525697503671, "step": 674, "batch_size": 64, "mean": 37.874568939208984, "std": 28.332286834716797, "min": -0.8873023986816406, "p10": 3.8411582946777356, "median": 32.6145133972168, "p90": 74.58417053222657, "max": 115.23285675048828, "pos_frac": 0.984375, "sample": [36.64567184448242, 30.081851959228516, 50.066650390625, 21.113235473632812, 4.9066619873046875, 61.971900939941406, 23.350568771362305, 35.10845184326172, 2.5879383087158203, 73.46387481689453, 43.55833435058594, 82.14224243164062, 0.6626167297363281, 86.11378479003906, 17.451095581054688, 13.39442253112793, 10.370168685913086, 115.23285675048828, 17.370567321777344, 97.74995422363281, 25.341306686401367, 24.030879974365234, 1.992950439453125, 63.61957550048828, 46.086097717285156, 27.952587127685547, 58.54124450683594, 10.217727661132812, 49.17497253417969, 2.68975830078125, 70.29908752441406, 109.48989868164062, 5.309688568115234, 3.3845138549804688, 85.71268463134766, 74.91896057128906, 15.464715957641602, 17.02648162841797, 49.65889358520508, 71.6293716430664, 33.07441711425781, 23.05455780029297, 73.80299377441406, 17.570499420166016, 9.944047927856445, 26.278335571289062, 55.14764404296875, 45.429603576660156, 16.240646362304688, 43.522701263427734, 28.092979431152344, -0.8873023986816406, 68.51835632324219, 9.839115142822266, 49.2564697265625, 41.87071228027344, 50.82588195800781, 45.091331481933594, 26.715904235839844, 37.69990539550781, 42.8994140625, 0.1819629669189453, 32.15460968017578, 11.76336669921875], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000674.npy"}
|
|
{"epoch": 0.9897209985315712, "step": 675, "batch_size": 64, "mean": 39.495147705078125, "std": 27.726579666137695, "min": -28.165916442871094, "p10": 7.229411315917969, "median": 39.685787200927734, "p90": 78.8889892578125, "max": 101.28887939453125, "pos_frac": 0.921875, "sample": [18.383596420288086, 27.800670623779297, 36.75922393798828, 31.04807472229004, 36.311614990234375, 8.546283721923828, 82.66844177246094, -0.6773185729980469, 16.911518096923828, 50.73755645751953, 56.388832092285156, -20.955970764160156, 72.39464569091797, 19.17430877685547, 34.35882568359375, 4.300392150878906, 7.133491516113281, 79.27655029296875, 62.753196716308594, -28.165916442871094, 79.73149108886719, -3.3801803588867188, 101.28887939453125, 70.34870147705078, 36.97843933105469, 9.318456649780273, 12.361812591552734, 48.74936294555664, 41.83800506591797, 7.453224182128906, 48.04094696044922, 73.24330139160156, 61.003570556640625, 31.458251953125, 23.94532012939453, 70.19493103027344, 34.80600357055664, -11.302230834960938, 59.66808319091797, 77.98468017578125, 44.586700439453125, 38.46394348144531, 15.173080444335938, 53.651336669921875, 45.83015441894531, 14.156997680664062, 37.576263427734375, 79.85234832763672, 42.64425277709961, 40.907630920410156, 14.077394485473633, 52.681732177734375, 55.022193908691406, 31.596874237060547, 20.148536682128906, 26.07919692993164, 50.01662826538086, 11.665946960449219, 50.509979248046875, 85.34535217285156, 60.98206329345703, 57.24739074707031, 81.96304321289062, 48.631431579589844], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000675.npy"}
|
|
{"epoch": 0.9911894273127754, "step": 676, "batch_size": 64, "mean": 36.297088623046875, "std": 28.963899612426758, "min": -24.548484802246094, "p10": 5.836563873291018, "median": 28.943811416625977, "p90": 72.48710861206057, "max": 99.427490234375, "pos_frac": 0.90625, "sample": [68.1841049194336, 90.55322265625, 41.275672912597656, 13.568092346191406, 64.42205810546875, 16.203109741210938, 46.839080810546875, 61.503395080566406, -1.8371696472167969, 10.881542205810547, -24.548484802246094, 36.03584289550781, 27.496421813964844, 14.74090576171875, 91.58201599121094, 64.0430908203125, 44.744815826416016, 48.10799789428711, 10.85788345336914, 17.85525131225586, 9.59918212890625, -1.1776809692382812, 42.68290710449219, 15.198883056640625, 20.447242736816406, 11.984306335449219, 23.82819366455078, 4.5814056396484375, 78.06678771972656, 61.12449645996094, 28.610671997070312, 59.85002136230469, 54.586204528808594, 47.914154052734375, 12.555267333984375, 16.248239517211914, 64.44070434570312, 56.799468994140625, 14.490447998046875, 99.427490234375, 34.6246337890625, 19.525588989257812, 51.6727294921875, 25.814022064208984, 40.42967224121094, -19.636932373046875, -6.543548583984375, 26.697479248046875, 8.765266418457031, 17.749725341796875, 92.49057006835938, 56.54522705078125, 66.67352294921875, 21.65473175048828, 13.531982421875, 53.40559387207031, 98.9020004272461, 16.985389709472656, 62.874332427978516, -1.3937149047851562, 29.27695083618164, 74.33125305175781, 15.65435791015625, 59.215423583984375], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000676.npy"}
|
|
{"epoch": 0.9926578560939795, "step": 677, "batch_size": 64, "mean": 37.449485778808594, "std": 28.72178840637207, "min": -29.576866149902344, "p10": 1.5555427551269536, "median": 35.74952507019043, "p90": 73.69906234741212, "max": 108.10565185546875, "pos_frac": 0.921875, "sample": [-29.576866149902344, 108.10565185546875, 19.752647399902344, 17.8697509765625, 7.899894714355469, 58.590721130371094, -13.640913009643555, 57.20941925048828, 78.3707046508789, 9.550209045410156, 23.211477279663086, 52.76338195800781, 3.4155197143554688, 78.58045959472656, 1.356353759765625, 61.544166564941406, 67.33624267578125, 14.845596313476562, 13.387451171875, 29.596160888671875, 37.1556282043457, 47.71091842651367, 34.343421936035156, 29.846641540527344, 46.530052185058594, 21.75103759765625, 75.27977752685547, 31.876691818237305, 57.35185241699219, 64.500732421875, 16.57769203186035, 28.6207275390625, -10.638158798217773, 27.62042236328125, 69.0929946899414, 26.852264404296875, 80.36044311523438, 27.19563865661621, 38.54054260253906, 58.88105773925781, 82.64759063720703, 3.3980941772460938, 70.00003051757812, 47.54400634765625, 1.3342437744140625, 51.30829620361328, 24.314926147460938, 15.727666854858398, 88.2459716796875, 13.081283569335938, 54.23307800292969, -0.9860115051269531, -14.636672973632812, 37.66719055175781, 70.01072692871094, 54.1812858581543, 24.45587921142578, 65.19031524658203, 55.11150360107422, 67.5611572265625, 37.54730987548828, 2.0203170776367188, 28.97699737548828, 48.213600158691406], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000677.npy"}
|
|
{"epoch": 0.9941262848751835, "step": 678, "batch_size": 64, "mean": 36.70520782470703, "std": 31.234189987182617, "min": -9.92763900756836, "p10": -0.10983066558837695, "median": 31.58275604248047, "p90": 82.16436004638672, "max": 100.96841430664062, "pos_frac": 0.890625, "sample": [19.869173049926758, 100.96841430664062, 36.274688720703125, 58.66162109375, 56.10548400878906, 1.8298492431640625, 49.183631896972656, -1.0653076171875, 4.100074768066406, 21.05703353881836, 5.9805908203125, 49.56717300415039, 31.203842163085938, 80.62728881835938, 3.08282470703125, 45.02687072753906, 84.11221313476562, -6.017494201660156, 31.877471923828125, 57.18824005126953, 80.44661712646484, 21.439102172851562, -3.296293258666992, 26.1778564453125, 26.37810516357422, 95.59579467773438, 17.029739379882812, 10.59332275390625, -4.770336151123047, 82.82310485839844, 50.73457336425781, -0.9411220550537109, -4.954925537109375, 62.56263732910156, 9.960685729980469, 90.4603271484375, 31.288040161132812, 73.11354064941406, 5.0195159912109375, 57.781524658203125, 2.193756103515625, 41.19623565673828, 44.83659362792969, 66.1368637084961, 54.38887023925781, -9.92763900756836, 66.13827514648438, 77.44805908203125, 13.505775451660156, 71.36802673339844, 37.290618896484375, 3.380340576171875, 24.330364227294922, 3.5945777893066406, 4.549154281616211, 10.254045486450195, 7.059602737426758, 85.99565887451172, 41.678489685058594, 50.15412139892578, 68.03995513916016, 26.787601470947266, 92.75668334960938, 8.901893615722656], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000678.npy"}
|
|
{"epoch": 0.9955947136563876, "step": 679, "batch_size": 64, "mean": 36.85191345214844, "std": 26.95528793334961, "min": -26.8460693359375, "p10": 9.05175895690918, "median": 33.74213218688965, "p90": 70.539266204834, "max": 109.41598510742188, "pos_frac": 0.953125, "sample": [77.56924438476562, 19.79096794128418, 27.154911041259766, 67.88973236083984, 49.878028869628906, 66.95743560791016, 30.562889099121094, 78.13243103027344, 62.03001403808594, 35.231689453125, 34.676361083984375, 69.43595123291016, 51.798683166503906, 12.153875350952148, 102.63409423828125, 19.076515197753906, 38.236358642578125, 24.599517822265625, 47.352745056152344, 31.006149291992188, 67.24642944335938, 104.83587646484375, 20.07647705078125, 14.074577331542969, 17.302989959716797, -26.8460693359375, 21.37706184387207, 33.54814910888672, 39.45634078979492, 16.9168643951416, 31.969444274902344, 15.10699462890625, -8.632316589355469, 64.21703338623047, 29.15985107421875, 19.03173065185547, 60.32373046875, 47.467559814453125, 6.908702850341797, 21.10700225830078, -6.05755615234375, 2.510528564453125, 31.175277709960938, 37.387550354003906, 73.10626983642578, 34.35181427001953, 13.779115676879883, 15.3800048828125, 13.63486099243164, 16.147247314453125, 56.78174591064453, 9.986923217773438, 8.65097427368164, 33.93611526489258, 36.376853942871094, 35.89271926879883, 71.01211547851562, 52.26691818237305, 48.870506286621094, 109.41598510742188, 43.056610107421875, 47.26969909667969, 26.445758819580078, 6.328245162963867], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000679.npy"}
|
|
{"epoch": 0.9970631424375918, "step": 680, "batch_size": 64, "mean": 36.1793098449707, "std": 29.486133575439453, "min": -17.139549255371094, "p10": 3.3941310882568376, "median": 33.03257179260254, "p90": 65.80172119140624, "max": 125.35784912109375, "pos_frac": 0.921875, "sample": [53.2635498046875, -12.624980926513672, 2.7226295471191406, 44.09904479980469, 11.733108520507812, 31.419227600097656, 107.04313659667969, 49.45440673828125, 51.703834533691406, 18.78691864013672, 12.9140625, 35.49046325683594, -2.820392608642578, 49.36517333984375, 39.31114959716797, 105.65505981445312, 23.315387725830078, 2.107635498046875, -6.64617919921875, 7.436408996582031, 73.5899658203125, 38.65821075439453, 8.072528839111328, 19.613643646240234, 23.224769592285156, 65.80770874023438, 32.92787551879883, 22.42653465270996, 29.819190979003906, 42.364013671875, 10.228080749511719, 62.06920623779297, 39.111907958984375, 51.0636100769043, 45.138729095458984, 39.12739562988281, 12.153617858886719, 47.744476318359375, 46.95960998535156, 10.42523193359375, 61.28736877441406, 107.57485961914062, 50.67768859863281, 28.580650329589844, 31.041561126708984, -17.139549255371094, 65.78775024414062, 16.4805908203125, 16.970417022705078, 92.73706817626953, -2.7035980224609375, 125.35784912109375, 6.5805816650390625, 17.30945587158203, 63.68061828613281, 33.13726806640625, 56.659217834472656, 52.289215087890625, 20.34878158569336, 23.07733154296875, 4.960968017578125, 28.86908721923828, 46.34967041015625, 41.305084228515625], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000680.npy"}
|
|
{"epoch": 0.9985315712187959, "step": 681, "batch_size": 64, "mean": 31.793434143066406, "std": 28.625511169433594, "min": -20.932456970214844, "p10": 1.5619449615478525, "median": 26.756820678710938, "p90": 63.89557571411134, "max": 120.13215637207031, "pos_frac": 0.90625, "sample": [10.313575744628906, -6.749244689941406, 65.24208068847656, 20.456085205078125, 60.56085205078125, 26.990196228027344, 55.158233642578125, 20.531536102294922, 23.631507873535156, 22.798564910888672, 7.05133056640625, 33.61895751953125, -12.778783798217773, 12.070281982421875, -0.13623046875, 120.13215637207031, 36.47681427001953, 10.929222106933594, 26.840309143066406, 48.58708190917969, 60.75373077392578, 2.9954757690429688, 12.139198303222656, 34.281951904296875, 15.641677856445312, 58.13347625732422, 18.840599060058594, 91.67431640625, 26.67333221435547, 32.817665100097656, 1.1368484497070312, 32.51275634765625, 34.546539306640625, 7.905494689941406, 21.645278930664062, 21.72612762451172, -0.6629962921142578, 6.2172393798828125, 33.49455261230469, -7.454242706298828, 18.37664794921875, 28.99622344970703, 37.992835998535156, 46.923301696777344, -20.932456970214844, 27.238853454589844, 5.33476448059082, 10.783348083496094, 16.063552856445312, 16.743179321289062, 72.52767944335938, 58.78449630737305, 76.39740753173828, 44.677268981933594, 21.862258911132812, 34.84241485595703, 60.34978485107422, 110.8502426147461, 54.25399398803711, 56.116783142089844, 2.5538368225097656, 60.192901611328125, 91.8756332397461, 15.231307983398438], "npy": "/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312/margin_logs/step_0000681.npy"}
|