Model: jackf857/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun Source: Original Platform
682 lines
1.1 MiB
682 lines
1.1 MiB
{"epoch": 0.0, "step": 1, "batch_size": 64, "mean": -0.02287048101425171, "std": 0.42023447155952454, "min": -1.4034271240234375, "p10": -0.46674575805664065, "median": 0.04234886169433594, "p90": 0.4323463439941407, "max": 0.89263916015625, "pos_frac": 0.53125, "sample": [-0.06523895263671875, 0.436798095703125, 0.27811431884765625, -0.9194221496582031, 0.018890380859375, 0.20587158203125, 0.18878173828125, -0.3968696594238281, 0.26206207275390625, 0.2470550537109375, -0.040912628173828125, 0.4394989013671875, -0.44133758544921875, -0.39148712158203125, 0.2764854431152344, 0.89263916015625, -0.42584991455078125, -0.46125030517578125, -0.8638992309570312, -0.3508758544921875, 0.371368408203125, 0.887847900390625, -0.382904052734375, 0.36145782470703125, -0.4890003204345703, 0.052455902099609375, -0.036136627197265625, 0.23079299926757812, 0.2469482421875, 0.1643218994140625, -0.07129669189453125, 0.2790794372558594, 0.3637123107910156, -0.8916168212890625, 0.03298759460449219, -0.2790107727050781, -0.17860984802246094, 0.23892593383789062, 0.05171012878417969, -0.2564239501953125, -0.14655303955078125, 0.27777862548828125, 0.0810394287109375, -1.4034271240234375, -0.28739166259765625, -0.1489429473876953, 0.44918060302734375, 0.1693286895751953, 0.10933303833007812, -0.14766693115234375, -0.40944671630859375, -0.18532562255859375, 0.6261310577392578, -0.20856857299804688, 0.602569580078125, 0.05538177490234375, 0.1505279541015625, 0.1313800811767578, -0.006317138671875, 0.42195892333984375, -0.29936981201171875, -0.4691009521484375, 0.16705322265625, -0.5789260864257812], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000001.npy"}
|
|
{"epoch": 0.0014684287812041115, "step": 2, "batch_size": 64, "mean": -0.06572240591049194, "std": 0.3523969054222107, "min": -0.9291305541992188, "p10": -0.46334152221679686, "median": -0.05502510070800781, "p90": 0.3672500610351563, "max": 1.0444793701171875, "pos_frac": 0.4375, "sample": [-0.2829437255859375, 0.3027191162109375, -0.19867706298828125, -0.3062286376953125, 0.10318756103515625, 0.20131683349609375, -0.34906005859375, 0.2802886962890625, 0.1914520263671875, -0.31072998046875, 0.08922195434570312, 0.10284614562988281, -0.03655242919921875, -0.0604095458984375, -0.06208038330078125, 0.32562255859375, -0.37982177734375, 0.2746162414550781, -0.049640655517578125, 0.3752174377441406, -0.103973388671875, 0.0699462890625, 0.36417388916015625, -0.033428192138671875, 0.37265777587890625, -0.3787078857421875, -0.6610565185546875, 0.4720420837402344, 0.47701263427734375, -0.27928924560546875, -0.44719696044921875, -0.0965118408203125, -0.7628555297851562, 0.046764373779296875, 0.06670379638671875, -0.9291305541992188, -0.7122802734375, -0.16554832458496094, 0.1485595703125, -0.07539939880371094, 0.2588920593261719, 0.039890289306640625, 0.201690673828125, 0.0623016357421875, 1.0444793701171875, -0.37696075439453125, -0.02794647216796875, -0.223297119140625, -0.35730743408203125, -0.1309051513671875, -0.3106689453125, -0.11409187316894531, -0.1669769287109375, 0.131317138671875, -0.2361297607421875, 0.4093780517578125, -0.6485977172851562, 0.36856842041015625, -0.1951904296875, -0.4702606201171875, -0.7624168395996094, 0.008928298950195312, -0.31630706787109375, 0.022550582885742188], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000002.npy"}
|
|
{"epoch": 0.002936857562408223, "step": 3, "batch_size": 64, "mean": -0.06313562393188477, "std": 0.34747758507728577, "min": -0.6941757202148438, "p10": -0.5081428527832031, "median": -0.08103275299072266, "p90": 0.31004981994628916, "max": 1.1425857543945312, "pos_frac": 0.40625, "sample": [-0.2111034393310547, -0.35184478759765625, -0.00292205810546875, 1.1425857543945312, -0.2964191436767578, -0.24936866760253906, 0.2808341979980469, 0.013631820678710938, -0.1563262939453125, 0.009889602661132812, -0.5525054931640625, 0.2082538604736328, 0.2187347412109375, -0.4325714111328125, -0.08674240112304688, -0.21845245361328125, 0.515869140625, -0.059604644775390625, 0.214202880859375, 0.08998489379882812, -0.6874618530273438, -0.04742431640625, 0.7362594604492188, -0.1116180419921875, 0.32257080078125, 0.05938720703125, 0.1155242919921875, 0.4285144805908203, 0.251312255859375, -0.34081268310546875, 0.33766937255859375, 0.25571441650390625, 0.0018215179443359375, 0.210784912109375, 0.06997108459472656, -0.26738739013671875, -0.499176025390625, -0.13058853149414062, -0.5119857788085938, -0.09004974365234375, -0.22855758666992188, -0.00716400146484375, -0.4908599853515625, -0.19464111328125, 0.272796630859375, -0.07532310485839844, -0.6941757202148438, 0.12556838989257812, -0.33042144775390625, -0.24903106689453125, 0.1640605926513672, -0.3004341125488281, 0.18291091918945312, -0.2583465576171875, -0.105560302734375, -0.3510551452636719, -0.010498046875, -0.5613937377929688, -0.344085693359375, 0.5799713134765625, 0.012342453002929688, -0.57342529296875, -0.16149139404296875, -0.6210174560546875], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000003.npy"}
|
|
{"epoch": 0.004405286343612335, "step": 4, "batch_size": 64, "mean": -0.06749901175498962, "std": 0.389242947101593, "min": -1.0184783935546875, "p10": -0.5528709411621093, "median": -0.05158710479736328, "p90": 0.39782180786132837, "max": 1.235321044921875, "pos_frac": 0.421875, "sample": [-0.14474868774414062, -0.38411712646484375, 0.15651893615722656, -1.0184783935546875, -0.2044830322265625, -0.04473114013671875, 0.1797332763671875, -0.797088623046875, -0.2528953552246094, -0.467803955078125, 0.026369094848632812, 0.3310699462890625, -0.17273712158203125, 0.10198974609375, -0.4002227783203125, -0.000804901123046875, -0.506500244140625, -0.048748016357421875, 0.0383758544921875, 0.07013130187988281, -0.32472991943359375, 0.09234619140625, -0.2170257568359375, -0.570343017578125, 0.42642974853515625, -0.17364501953125, 0.1369476318359375, -0.0856475830078125, -0.05442619323730469, -0.6043701171875, 0.04840850830078125, -0.04227447509765625, -0.522430419921875, 0.08160781860351562, -0.1416797637939453, 0.26229095458984375, -0.2704277038574219, 0.2656097412109375, -0.5834083557128906, 0.1190643310546875, -0.5558853149414062, -0.12653541564941406, 0.46692657470703125, 0.4379425048828125, 0.060150146484375, 0.08610916137695312, -0.3527374267578125, 1.235321044921875, 0.04479408264160156, -0.30108642578125, 0.06628990173339844, 0.7051849365234375, 0.5562801361083984, -0.44518280029296875, -0.060718536376953125, 0.11093902587890625, 0.9233856201171875, -0.15624237060546875, 0.291717529296875, -0.02622222900390625, -0.57635498046875, -0.3031463623046875, -0.54583740234375, -0.15815353393554688], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000004.npy"}
|
|
{"epoch": 0.005873715124816446, "step": 5, "batch_size": 64, "mean": 0.06674566864967346, "std": 0.349670946598053, "min": -0.7608489990234375, "p10": -0.350267219543457, "median": 0.04918956756591797, "p90": 0.4829771041870118, "max": 1.1513137817382812, "pos_frac": 0.59375, "sample": [-0.11187744140625, 0.7811355590820312, 0.47107696533203125, -0.0390777587890625, 1.1513137817382812, 0.0822906494140625, 0.09284210205078125, -0.006374359130859375, -0.2821083068847656, -0.03423309326171875, -0.47320556640625, -0.006561279296875, 0.18306732177734375, 0.025360107421875, 0.13634490966796875, 0.5115127563476562, 0.01988983154296875, -0.35727500915527344, 0.6121292114257812, -0.2833709716796875, -0.23558616638183594, 0.09521484375, 0.021488189697265625, 0.35558319091796875, 0.11654472351074219, 0.5027999877929688, 0.16607666015625, 0.3324394226074219, -0.6174545288085938, -0.06047821044921875, -0.15184974670410156, 0.48807716369628906, 0.2925567626953125, 0.09175872802734375, 0.20801734924316406, 0.2806396484375, 0.36871337890625, 0.0078277587890625, 0.8762969970703125, 0.35174560546875, 0.010467529296875, 0.16529083251953125, 0.46042633056640625, -0.7608489990234375, 0.40309906005859375, 0.1057281494140625, -0.054821014404296875, -0.33391571044921875, -0.3140869140625, -0.48598480224609375, -0.139129638671875, 0.06762504577636719, -0.2426910400390625, 0.21824264526367188, 0.3365974426269531, 0.12926483154296875, 0.1837158203125, -0.30889892578125, -0.439239501953125, -0.12380218505859375, -0.39882659912109375, 0.03075408935546875, -0.1897125244140625, -0.010822296142578125], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000005.npy"}
|
|
{"epoch": 0.007342143906020558, "step": 6, "batch_size": 64, "mean": -0.007933229207992554, "std": 0.2757773697376251, "min": -0.5821876525878906, "p10": -0.3977687835693359, "median": 0.0039005279541015625, "p90": 0.33930091857910183, "max": 0.55615234375, "pos_frac": 0.53125, "sample": [0.0520477294921875, -0.14188194274902344, 0.001392364501953125, 0.051055908203125, 0.2003326416015625, 0.429962158203125, -0.5093822479248047, -0.5525894165039062, -0.1134033203125, 0.0932159423828125, -0.028362274169921875, 0.47717857360839844, 0.031219482421875, 0.4356060028076172, 0.10933303833007812, -0.2247161865234375, 0.36539459228515625, -0.15568161010742188, 0.19493865966796875, -0.06400299072265625, -0.13583755493164062, -0.35311317443847656, 0.205596923828125, -0.01413726806640625, 0.03262138366699219, 0.5077667236328125, 0.2784156799316406, 0.11286163330078125, -0.068634033203125, -0.0648345947265625, -0.08138275146484375, 0.18844032287597656, 0.00640869140625, -0.2929115295410156, -0.12471771240234375, 0.26202392578125, -0.5371932983398438, -0.5821876525878906, 0.12730979919433594, 0.260772705078125, -0.227569580078125, 0.170867919921875, -0.3896484375, 0.1068878173828125, 0.2541942596435547, -0.0091552734375, 0.09200668334960938, 0.3922557830810547, 0.08795166015625, -0.21533203125, -0.448333740234375, 0.20343017578125, 0.25286865234375, -0.14622116088867188, 0.084197998046875, -0.08739662170410156, 0.18746185302734375, -0.2161865234375, -0.24835205078125, -0.5181884765625, -0.3678436279296875, -0.4012489318847656, 0.55615234375, 0.00054931640625], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000006.npy"}
|
|
{"epoch": 0.00881057268722467, "step": 7, "batch_size": 64, "mean": -0.009620696306228638, "std": 0.4239095449447632, "min": -0.9776611328125, "p10": -0.5338748931884766, "median": 0.03446006774902344, "p90": 0.48388557434082036, "max": 0.8633270263671875, "pos_frac": 0.546875, "sample": [0.17736053466796875, -0.7477645874023438, 0.044391632080078125, 0.1210174560546875, -0.31429290771484375, 0.3320655822753906, -0.7502708435058594, 0.7432861328125, -0.6319580078125, 0.80889892578125, -0.9776611328125, -0.22840118408203125, 0.0628814697265625, 0.24691009521484375, 0.0384521484375, -0.0228729248046875, 0.3982219696044922, -0.4977283477783203, -0.4034881591796875, -0.5027580261230469, 0.052234649658203125, 0.6219482421875, 0.563079833984375, 0.048492431640625, 0.48956298828125, -0.34328460693359375, -0.13016319274902344, -0.05152702331542969, 0.17704391479492188, -0.03218841552734375, -0.45555877685546875, 0.3396148681640625, 0.6836967468261719, 0.1394805908203125, 0.259307861328125, 0.00330352783203125, 0.02471923828125, 0.3847160339355469, -0.42913818359375, -0.42981719970703125, -0.324798583984375, -0.547210693359375, 0.28012847900390625, -0.31500244140625, 0.030467987060546875, 0.2298583984375, 0.23114395141601562, -0.4739570617675781, 0.4036407470703125, -0.8698196411132812, 0.8633270263671875, -0.3787384033203125, -0.3968505859375, 0.18599700927734375, 0.37186431884765625, 0.36785888671875, -0.23677444458007812, 0.4706382751464844, 0.3207721710205078, -0.0214385986328125, 0.3332366943359375, -0.0979766845703125, -0.2581634521484375, -0.5957412719726562], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000007.npy"}
|
|
{"epoch": 0.010279001468428781, "step": 8, "batch_size": 64, "mean": 0.0070285797119140625, "std": 0.3879009485244751, "min": -1.241607666015625, "p10": -0.4700736999511718, "median": 0.021982192993164062, "p90": 0.4249008178710938, "max": 0.858642578125, "pos_frac": 0.5625, "sample": [0.7090911865234375, 0.2900543212890625, 0.049560546875, -0.030008316040039062, 0.32701873779296875, -0.1157379150390625, 0.06921768188476562, 0.01177978515625, 0.2791900634765625, -0.22180938720703125, -0.028804779052734375, -0.14058303833007812, 0.25689697265625, 0.20553970336914062, -0.31566619873046875, -0.24069976806640625, 0.3210105895996094, -0.18642044067382812, 0.5541572570800781, -0.13208770751953125, 0.01082611083984375, -0.958221435546875, 0.114898681640625, 0.4974365234375, -0.15178680419921875, -0.18325042724609375, -0.0082244873046875, 0.16655349731445312, 0.375701904296875, -0.48281097412109375, 0.4123077392578125, -0.9147262573242188, 0.3071784973144531, 0.5836029052734375, -0.04412841796875, -0.94891357421875, 0.00984954833984375, -0.08282279968261719, 0.04172515869140625, 0.20897293090820312, 0.858642578125, -0.022369384765625, 0.027008056640625, -0.542755126953125, 0.041812896728515625, -1.241607666015625, -0.030487060546875, -0.170257568359375, 0.1841907501220703, 0.2720947265625, -0.20344161987304688, -0.5554885864257812, 0.4302978515625, 0.17158889770507812, 0.1656646728515625, 0.016956329345703125, -0.4403533935546875, 0.08812713623046875, 0.037841796875, -0.13335418701171875, 0.18181800842285156, 0.4023017883300781, -0.1995697021484375, 0.49530029296875], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000008.npy"}
|
|
{"epoch": 0.011747430249632892, "step": 9, "batch_size": 64, "mean": 0.1140756905078888, "std": 0.46454307436943054, "min": -0.804046630859375, "p10": -0.370965576171875, "median": 0.03476142883300781, "p90": 0.7363250732421877, "max": 1.32843017578125, "pos_frac": 0.515625, "sample": [0.3753509521484375, -0.33257293701171875, 0.5820407867431641, 0.119842529296875, 0.06395721435546875, -0.490447998046875, 0.6206436157226562, 0.20389938354492188, 0.55804443359375, 0.037967681884765625, -0.2545623779296875, -0.804046630859375, -0.3716888427734375, -0.3692779541015625, -0.2647857666015625, 0.532073974609375, 0.64471435546875, -0.4187126159667969, -0.2774810791015625, -0.037689208984375, -0.16196441650390625, -0.01123809814453125, -0.3592376708984375, -0.0963287353515625, 0.354278564453125, 0.12390899658203125, 0.21104812622070312, -0.250244140625, -0.025480270385742188, 1.1740341186523438, -0.1988372802734375, -0.18994712829589844, 0.8661880493164062, 0.5107879638671875, -0.1708984375, -0.5716781616210938, -0.09401321411132812, 0.046009063720703125, 0.03155517578125, 0.69024658203125, -0.282867431640625, -0.33422088623046875, 0.47784423828125, 0.13002395629882812, 0.17171096801757812, 0.3519134521484375, 0.6215667724609375, 0.5051727294921875, 0.47308349609375, -0.03455543518066406, -0.2898712158203125, -0.2987518310546875, 0.756072998046875, 0.8574638366699219, 0.3094654083251953, 1.32843017578125, 0.0580596923828125, 1.0983123779296875, -0.126373291015625, 0.874664306640625, -0.3972625732421875, -0.32513427734375, -0.109283447265625, -0.5100784301757812], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000009.npy"}
|
|
{"epoch": 0.013215859030837005, "step": 10, "batch_size": 64, "mean": 0.03607863187789917, "std": 0.3264070451259613, "min": -0.6317672729492188, "p10": -0.36651268005371085, "median": 0.0486602783203125, "p90": 0.4939880371093751, "max": 1.1133575439453125, "pos_frac": 0.546875, "sample": [-0.1221160888671875, -0.103363037109375, 0.05763435363769531, 0.10190010070800781, -0.5670814514160156, 0.005279541015625, -0.06838226318359375, -0.297210693359375, 0.17635345458984375, -0.0907745361328125, 0.08756256103515625, 0.5199623107910156, 0.050506591796875, 0.4128265380859375, -0.5799484252929688, -0.066741943359375, 0.07735061645507812, 0.10106658935546875, 0.1421833038330078, -0.04848480224609375, 0.3367500305175781, 0.08816909790039062, 0.28821754455566406, -0.18795013427734375, -0.10152435302734375, -0.19728469848632812, -0.5745468139648438, 0.16953277587890625, 0.10599136352539062, 0.26204872131347656, 0.5089874267578125, -0.28253173828125, 0.2445545196533203, 0.23053741455078125, -0.6317672729492188, -0.4742240905761719, -0.258331298828125, -0.054302215576171875, 0.19128799438476562, -0.08779335021972656, -0.2133922576904297, -0.22138214111328125, -0.1375255584716797, 0.45941162109375, 0.04681396484375, 0.5903167724609375, 0.5139656066894531, 0.102996826171875, -0.3962135314941406, 0.02048492431640625, -0.2295379638671875, 0.05658149719238281, 0.464935302734375, 0.16681289672851562, -0.02472686767578125, -0.05709266662597656, 0.212188720703125, -0.4785308837890625, 0.506439208984375, 1.1133575439453125, 0.6592178344726562, -0.10857200622558594, 0.0674591064453125, -0.16931915283203125], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000010.npy"}
|
|
{"epoch": 0.014684287812041116, "step": 11, "batch_size": 64, "mean": 0.02540189027786255, "std": 0.3896263539791107, "min": -1.051513671875, "p10": -0.4258392333984375, "median": 0.010257720947265625, "p90": 0.5327821731567384, "max": 1.190399169921875, "pos_frac": 0.515625, "sample": [-0.16028594970703125, 0.6006927490234375, 0.5420284271240234, 0.08447647094726562, 0.01610565185546875, 0.09304428100585938, 0.31293487548828125, 0.036029815673828125, -0.13894271850585938, 0.778564453125, -0.08516120910644531, -0.34485626220703125, 0.13970947265625, -0.0146484375, 0.20648193359375, 0.11261749267578125, -0.023468017578125, 0.1624603271484375, 1.190399169921875, 0.7274265289306641, -0.048614501953125, 0.244476318359375, -0.05089569091796875, 0.3507347106933594, -0.2838592529296875, 0.5112075805664062, 0.03247833251953125, -0.431396484375, -0.1319122314453125, -0.19886398315429688, 0.0314178466796875, -0.2279205322265625, 0.46768951416015625, -0.270904541015625, -0.06703948974609375, -0.3128509521484375, 0.0044097900390625, 0.6679763793945312, -0.5718231201171875, -0.1944580078125, -0.03045654296875, -0.013927459716796875, -0.2907066345214844, -0.3852996826171875, 0.19607925415039062, 0.035991668701171875, 0.6121673583984375, 0.09796905517578125, -1.051513671875, 0.19443511962890625, -0.412872314453125, -0.5674934387207031, 0.2360382080078125, -0.5730438232421875, 0.22893905639648438, 0.214813232421875, -0.4066886901855469, -0.48954010009765625, -0.08608245849609375, 0.49090576171875, 0.45685577392578125, 0.13177490234375, -0.030977249145507812, -0.6871070861816406], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000011.npy"}
|
|
{"epoch": 0.016152716593245228, "step": 12, "batch_size": 64, "mean": 0.038763463497161865, "std": 0.3871566653251648, "min": -1.082733154296875, "p10": -0.49184265136718747, "median": 0.06688594818115234, "p90": 0.4622657775878906, "max": 0.868621826171875, "pos_frac": 0.53125, "sample": [0.3426971435546875, -0.5421600341796875, -0.238983154296875, -0.3304443359375, -0.43133544921875, -0.0022125244140625, 0.13597869873046875, 0.46254730224609375, 0.0958099365234375, 0.28281402587890625, -0.36023712158203125, -0.81378173828125, -0.5133590698242188, 0.2882232666015625, -0.1628265380859375, 0.2467193603515625, 0.42076873779296875, 0.46160888671875, 0.185302734375, -0.4686431884765625, -1.082733154296875, 0.2710418701171875, 0.868621826171875, -0.5212326049804688, -0.20163345336914062, 0.13095855712890625, 0.0798492431640625, 0.616180419921875, 0.26282501220703125, 0.347137451171875, -0.785003662109375, 0.2889251708984375, 0.04511260986328125, 0.09858131408691406, -0.08130645751953125, -0.15756607055664062, -0.1197509765625, -0.5017852783203125, -0.44564056396484375, -0.04436492919921875, -0.15373992919921875, -0.00994873046875, -0.023014068603515625, 0.2410430908203125, -0.1881256103515625, -0.05210113525390625, 0.7363433837890625, 0.30689239501953125, 0.1947765350341797, 0.4863128662109375, -0.01725006103515625, 0.1674976348876953, 0.458740234375, 0.3160667419433594, 0.8013763427734375, -0.16119384765625, 0.3553619384765625, -0.0755767822265625, -0.0393829345703125, 0.05392265319824219, 0.23984909057617188, -0.12723541259765625, 0.336395263671875, 0.5071487426757812], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000012.npy"}
|
|
{"epoch": 0.01762114537444934, "step": 13, "batch_size": 64, "mean": -0.04489260911941528, "std": 0.34645259380340576, "min": -0.8745269775390625, "p10": -0.4205257415771484, "median": -0.083587646484375, "p90": 0.30791702270507815, "max": 1.2528305053710938, "pos_frac": 0.359375, "sample": [-0.0283050537109375, -0.3475933074951172, 0.14141845703125, -0.5403671264648438, -0.1782073974609375, 0.4656333923339844, 0.48615264892578125, 0.11504364013671875, 0.2039031982421875, -0.3609809875488281, -0.20037078857421875, 0.8620185852050781, 0.14019775390625, -0.2773628234863281, -0.0249481201171875, 1.2528305053710938, -0.0611572265625, -0.446044921875, -0.2810554504394531, -0.02471160888671875, -0.014860153198242188, -0.16234588623046875, 0.30919647216796875, -0.06746673583984375, -0.11175918579101562, -0.244293212890625, 0.13805007934570312, 0.011819839477539062, -0.086669921875, 0.029022216796875, -0.24561309814453125, -0.17125701904296875, 0.045429229736328125, 0.3209648132324219, -0.8745269775390625, -0.2266693115234375, -0.089996337890625, -0.12096214294433594, -0.08636474609375, -0.452728271484375, -0.3188056945800781, 0.188201904296875, -0.291748046875, -0.4505157470703125, -0.179779052734375, -0.03695487976074219, -0.6655445098876953, 0.12307167053222656, -0.0194549560546875, -0.20345306396484375, -0.6082382202148438, 0.304931640625, 0.20953369140625, 0.06976127624511719, -0.3155670166015625, -0.2022247314453125, 0.21299362182617188, 0.01032257080078125, 0.80999755859375, -0.149261474609375, 0.298370361328125, -0.2220306396484375, -0.080810546875, -0.1509857177734375], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000013.npy"}
|
|
{"epoch": 0.01908957415565345, "step": 14, "batch_size": 64, "mean": -0.0062340497970581055, "std": 0.4301234781742096, "min": -1.08428955078125, "p10": -0.5658349990844725, "median": 0.0053501129150390625, "p90": 0.4797309875488283, "max": 0.9222335815429688, "pos_frac": 0.53125, "sample": [-0.25128936767578125, 0.12346267700195312, -0.2150592803955078, 0.09836578369140625, 0.384490966796875, -0.033679962158203125, -1.08428955078125, -0.35245513916015625, 0.9222335815429688, 0.3588676452636719, 0.03823089599609375, -0.08443832397460938, 0.715545654296875, 0.7801589965820312, -0.7360305786132812, -0.20912933349609375, -0.6156826019287109, -0.04146575927734375, 0.28839111328125, -0.25908851623535156, 0.6250343322753906, 0.00463104248046875, 0.0736541748046875, 0.2937889099121094, 0.2056427001953125, -0.03847503662109375, -1.0692138671875, -0.07929611206054688, -0.41571044921875, 0.560272216796875, 0.06829452514648438, 0.006069183349609375, 0.33237457275390625, -0.06531906127929688, 0.2926750183105469, 0.30612945556640625, 0.3209953308105469, -0.3382568359375, -0.2633171081542969, 0.21448898315429688, -0.44952392578125, 0.0014801025390625, -0.0305328369140625, 0.7918014526367188, -0.06707763671875, -0.05564117431640625, 0.27084922790527344, -0.4284629821777344, -0.1842041015625, -0.33758544921875, -0.8385467529296875, 0.26384735107421875, 0.43685150146484375, -0.76739501953125, -0.2530670166015625, 0.49810791015625, 0.26911163330078125, 0.1641387939453125, -0.9309005737304688, 0.2531166076660156, -0.1974163055419922, 0.24588966369628906, 0.02667236328125, 0.0579071044921875], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000014.npy"}
|
|
{"epoch": 0.020558002936857563, "step": 15, "batch_size": 64, "mean": 0.013581663370132446, "std": 0.2954900860786438, "min": -0.7161178588867188, "p10": -0.39126720428466794, "median": 0.0406341552734375, "p90": 0.3851737976074219, "max": 0.5654525756835938, "pos_frac": 0.53125, "sample": [0.09271430969238281, -0.104034423828125, 0.08731269836425781, -0.053318023681640625, 0.41262054443359375, 0.09054183959960938, -0.40955543518066406, 0.5270462036132812, -0.133941650390625, 0.41426849365234375, -0.0216064453125, -0.23010826110839844, 0.1370391845703125, -0.2290802001953125, 0.3106117248535156, 0.2035675048828125, 0.00218963623046875, -0.46303558349609375, -0.2572174072265625, 0.5273590087890625, 0.3851470947265625, -0.7161178588867188, -0.15958404541015625, -0.05576324462890625, -0.5198688507080078, -0.292236328125, 0.08815765380859375, 0.22945404052734375, -0.05260467529296875, 0.04320526123046875, -0.5935440063476562, -0.450042724609375, 0.06603240966796875, -0.24037742614746094, 0.10930633544921875, 0.3651885986328125, 0.31835174560546875, 0.4593505859375, 0.3481597900390625, -0.34859466552734375, -0.10596275329589844, -0.05820465087890625, -0.01532745361328125, 0.07717514038085938, 0.2233428955078125, -0.11041831970214844, 0.24778175354003906, -0.2402496337890625, 0.23223876953125, 0.1591339111328125, -0.14955902099609375, 0.38518524169921875, 0.03806304931640625, -0.054233551025390625, 0.12149810791015625, 0.356689453125, 0.248077392578125, -0.08349609375, 0.055084228515625, 0.10669708251953125, -0.0544586181640625, -0.6233062744140625, -0.3389701843261719, 0.5654525756835938], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000015.npy"}
|
|
{"epoch": 0.022026431718061675, "step": 16, "batch_size": 64, "mean": 0.04175823926925659, "std": 0.32614874839782715, "min": -0.7645416259765625, "p10": -0.36246814727783205, "median": 0.0381317138671875, "p90": 0.4312057495117188, "max": 0.8462944030761719, "pos_frac": 0.546875, "sample": [0.11920356750488281, 0.622589111328125, -0.6841583251953125, -0.1102294921875, -0.3626289367675781, -0.0233001708984375, 0.1101531982421875, 0.060665130615234375, 0.1793365478515625, 0.21437835693359375, 0.3393707275390625, 0.16129302978515625, 0.7314605712890625, -0.021026611328125, 0.01584625244140625, 0.04140472412109375, 0.13616180419921875, -0.7645416259765625, -0.5940704345703125, 0.14101409912109375, -0.0360107421875, 0.1817035675048828, 0.5581779479980469, 0.20546722412109375, -0.06918907165527344, -0.12045478820800781, -0.5337295532226562, -0.073944091796875, 0.07521438598632812, -0.15297698974609375, 0.23159408569335938, 0.5148468017578125, -0.116943359375, -0.3704032897949219, 0.08104705810546875, 0.1713409423828125, 0.6883773803710938, -0.0363616943359375, 0.32808685302734375, -0.09021568298339844, 0.19650650024414062, 0.0499725341796875, 0.19298934936523438, 0.358673095703125, 0.41112518310546875, 0.2272796630859375, -0.03220558166503906, -0.3620929718017578, -0.12718963623046875, -0.017303466796875, -0.1504669189453125, 0.43981170654296875, 0.03204345703125, -0.33953094482421875, -0.09632110595703125, 0.03485870361328125, -0.35315895080566406, -0.5228271484375, 0.8462944030761719, 0.267791748046875, 0.3571929931640625, -0.054718017578125, -0.1759490966796875, -0.25879669189453125], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000016.npy"}
|
|
{"epoch": 0.023494860499265784, "step": 17, "batch_size": 64, "mean": 0.10520240664482117, "std": 0.4037126898765564, "min": -0.9837188720703125, "p10": -0.3674848556518554, "median": 0.10762405395507812, "p90": 0.6332389831542969, "max": 1.1279296875, "pos_frac": 0.640625, "sample": [-0.5102882385253906, 0.16095352172851562, 0.04886627197265625, 0.1842803955078125, -0.5588302612304688, -0.22542953491210938, 0.6359100341796875, -0.9837188720703125, 0.13885498046875, -0.41968536376953125, 0.7926864624023438, -0.04931640625, 0.0826416015625, 1.1279296875, -0.19942665100097656, 0.056640625, -0.15485763549804688, 0.5089855194091797, -0.13908004760742188, 0.46514892578125, -0.0430908203125, 0.08670616149902344, -0.24197006225585938, 0.044952392578125, 0.230804443359375, 0.41077423095703125, 0.2600421905517578, 0.14492416381835938, 0.7235870361328125, 0.1070556640625, -0.28863525390625, 0.6270065307617188, 0.43520355224609375, 0.20468902587890625, 0.1242828369140625, 0.6976165771484375, -0.08549880981445312, -0.24074363708496094, 0.1260986328125, 0.17279052734375, 0.376708984375, 0.15387344360351562, -0.4012775421142578, 0.35704803466796875, -0.8286590576171875, 0.264984130859375, -0.07416915893554688, -0.5535926818847656, 0.09130859375, 0.10819244384765625, -0.08983230590820312, -0.1922607421875, -0.2345733642578125, 0.01811981201171875, 0.13533782958984375, -0.19439697265625, 1.126007080078125, 0.7172813415527344, 0.592193603515625, 0.007293701171875, 0.27040672302246094, 0.2218170166015625, -0.0654144287109375, 0.4676971435546875], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000017.npy"}
|
|
{"epoch": 0.024963289280469897, "step": 18, "batch_size": 64, "mean": 0.0032672882080078125, "std": 0.336790531873703, "min": -1.1476287841796875, "p10": -0.3492591857910156, "median": -0.011651992797851562, "p90": 0.47185974121093766, "max": 0.7335624694824219, "pos_frac": 0.484375, "sample": [0.31549072265625, -0.3361968994140625, 0.18902587890625, 0.6454086303710938, -0.0722198486328125, -0.22273635864257812, -0.192291259765625, -0.5637359619140625, -0.086944580078125, 0.17222976684570312, -0.180389404296875, -0.09949111938476562, 0.23624420166015625, 0.22636032104492188, -0.134307861328125, 0.6605453491210938, 0.11324119567871094, -0.1517963409423828, -0.0068206787109375, -0.1085968017578125, -0.08893013000488281, -0.0609283447265625, -0.4255218505859375, 0.11019325256347656, 0.7335624694824219, 0.28893280029296875, -0.20862579345703125, 0.087127685546875, -0.4354572296142578, 0.04957008361816406, 0.3049736022949219, 0.20862388610839844, -0.07781219482421875, -0.28890228271484375, 0.4863433837890625, -0.26702880859375, -0.31710052490234375, 0.298370361328125, -0.34064483642578125, 0.2591552734375, 0.5086097717285156, -0.2500762939453125, 0.01940155029296875, -0.134674072265625, 0.498687744140625, 0.20822906494140625, 0.4380645751953125, -0.016483306884765625, -0.572998046875, 0.16717147827148438, 0.17911911010742188, 0.2495899200439453, -0.2005615234375, -0.3529510498046875, 0.05829620361328125, -0.0500946044921875, 0.6023406982421875, 0.00783538818359375, -1.1476287841796875, 0.04317283630371094, -0.11649131774902344, -0.051959991455078125, -0.6138992309570312, 0.017486572265625], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000018.npy"}
|
|
{"epoch": 0.02643171806167401, "step": 19, "batch_size": 64, "mean": 0.06906837224960327, "std": 0.3174370527267456, "min": -0.7047882080078125, "p10": -0.3064712524414062, "median": 0.03057861328125, "p90": 0.5074699401855469, "max": 0.8870048522949219, "pos_frac": 0.59375, "sample": [0.4953651428222656, 0.024198532104492188, -0.33562469482421875, -0.28359222412109375, 0.04026031494140625, 0.430938720703125, 0.1940765380859375, -0.14596939086914062, -0.21384239196777344, 0.5214157104492188, -0.1619110107421875, -0.17218780517578125, 0.5086898803710938, 0.38653564453125, 0.76837158203125, -0.7047882080078125, -0.5993194580078125, 0.21758270263671875, -0.4267120361328125, 0.2848968505859375, 0.513427734375, -0.00595855712890625, -0.05625152587890625, 0.22027587890625, 0.03565025329589844, 0.0213775634765625, 0.32720947265625, 0.13303375244140625, -0.20949172973632812, 0.2604827880859375, 0.0404052734375, 0.2942352294921875, 0.03034210205078125, 0.014448165893554688, 0.5806198120117188, -0.0509796142578125, -0.4437408447265625, -0.044559478759765625, -0.028564453125, -0.266204833984375, -0.13864898681640625, 0.131256103515625, -0.31627655029296875, -0.180938720703125, -0.06256294250488281, 0.8870048522949219, 0.3140869140625, 0.296539306640625, 0.5046234130859375, 0.2694244384765625, 0.1553192138671875, -0.19671249389648438, -0.1815509796142578, 0.026964187622070312, 0.26972198486328125, 0.07116889953613281, 0.03081512451171875, 0.11331558227539062, -0.3378753662109375, -0.1109619140625, 0.629608154296875, 0.22687911987304688, -0.20101165771484375, 0.0260467529296875], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000019.npy"}
|
|
{"epoch": 0.027900146842878122, "step": 20, "batch_size": 64, "mean": 0.059678733348846436, "std": 0.31928321719169617, "min": -0.822540283203125, "p10": -0.28210926055908203, "median": 0.02462005615234375, "p90": 0.44265403747558607, "max": 0.803863525390625, "pos_frac": 0.609375, "sample": [0.05857086181640625, 0.25255584716796875, -0.233734130859375, 0.0091705322265625, 0.0252838134765625, -0.2218799591064453, -0.0704803466796875, -0.26868438720703125, 0.40370941162109375, 0.2578125, -0.822540283203125, 0.24114227294921875, 0.7230491638183594, -0.21662330627441406, -0.09522628784179688, 0.000606536865234375, 0.009632110595703125, 0.1412811279296875, -0.5024929046630859, -0.05084228515625, 0.5138397216796875, -0.4679107666015625, -0.1179962158203125, 0.0074481964111328125, 0.02252960205078125, -0.2018585205078125, 0.05865478515625, -0.029664993286132812, 0.22045516967773438, -0.28786277770996094, 0.023242950439453125, -0.05169677734375, -0.6268844604492188, 0.06951522827148438, -0.42046546936035156, 0.0885467529296875, 0.39581298828125, 0.288604736328125, -0.14780807495117188, 0.34236907958984375, 0.15039825439453125, 0.0900421142578125, 0.24323272705078125, 0.3540191650390625, -0.244842529296875, 0.1677417755126953, 0.3997344970703125, 0.803863525390625, 0.023956298828125, -0.20609664916992188, -0.37917327880859375, -0.06153106689453125, 0.291656494140625, 0.7116851806640625, 0.45412445068359375, -0.1496143341064453, 0.54345703125, 0.4158897399902344, 0.030698776245117188, -0.0201263427734375, 0.07132339477539062, 0.38897705078125, 0.5358467102050781, -0.1150054931640625], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000020.npy"}
|
|
{"epoch": 0.02936857562408223, "step": 21, "batch_size": 64, "mean": 0.08421804010868073, "std": 0.37602153420448303, "min": -0.8352432250976562, "p10": -0.26056327819824215, "median": 0.000370025634765625, "p90": 0.5967895507812501, "max": 1.2080154418945312, "pos_frac": 0.515625, "sample": [0.2054290771484375, 0.14649009704589844, 1.2080154418945312, 0.00067138671875, 0.889495849609375, 0.01947498321533203, -0.11268806457519531, -0.24197006225585938, -0.02886962890625, 0.16131591796875, -0.001708984375, -0.108245849609375, 0.05860137939453125, -0.050373077392578125, 0.36444091796875, 0.8173675537109375, 0.3188591003417969, -0.1500701904296875, -0.1296672821044922, 0.3096199035644531, -0.129364013671875, -0.043182373046875, 0.103424072265625, -0.038784027099609375, -0.22916603088378906, 0.6065216064453125, -0.112396240234375, -0.17431068420410156, 6.866455078125e-05, 0.0164947509765625, -0.673187255859375, 0.3006439208984375, -0.21190261840820312, 0.2317829132080078, 0.373077392578125, 0.150726318359375, -0.18176651000976562, -0.04903984069824219, 0.8789825439453125, -0.34227752685546875, -0.26853179931640625, -0.00334930419921875, 0.3551483154296875, 0.0013885498046875, -0.3963356018066406, -0.0865631103515625, 0.4427604675292969, 0.2568511962890625, 0.306243896484375, -0.8352432250976562, 0.0679168701171875, 0.4095458984375, 0.6520500183105469, 0.5740814208984375, -0.19828414916992188, -0.3551902770996094, -0.3564605712890625, -0.1860504150390625, 0.95733642578125, 0.056674957275390625, -0.1104888916015625, -0.18979644775390625, 0.3567962646484375, -0.21307945251464844], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000021.npy"}
|
|
{"epoch": 0.030837004405286344, "step": 22, "batch_size": 64, "mean": 0.2030039131641388, "std": 0.3519876003265381, "min": -0.5020751953125, "p10": -0.20379943847656248, "median": 0.15009307861328125, "p90": 0.7523994445800783, "max": 0.9633941650390625, "pos_frac": 0.65625, "sample": [-0.10422515869140625, 0.41229248046875, -0.05206298828125, 0.06821823120117188, -0.0579376220703125, 0.0084075927734375, 0.3321533203125, 0.08814048767089844, 0.20827102661132812, 0.5531158447265625, 0.5801124572753906, 0.823455810546875, -0.22467422485351562, 0.8626976013183594, -0.021768569946289062, -0.25749969482421875, 0.4263916015625, 0.40512657165527344, -0.25080108642578125, -0.200103759765625, -0.11162567138671875, 0.9633941650390625, -0.20538330078125, 0.1280059814453125, -0.15529823303222656, 0.5767078399658203, 0.5923919677734375, 0.17218017578125, 0.3678131103515625, 0.9504852294921875, 0.26716041564941406, 0.24610137939453125, -0.22005462646484375, 0.0378570556640625, -0.13721847534179688, 0.201629638671875, 0.183258056640625, 0.17841339111328125, -0.12191581726074219, 0.6894073486328125, -0.0149993896484375, 0.22312164306640625, -0.042163848876953125, 0.19139480590820312, 0.0023345947265625, 0.6400222778320312, -0.1890411376953125, -0.2168731689453125, 0.9307098388671875, 0.37158966064453125, -0.089599609375, 0.5531234741210938, -0.14234352111816406, 0.046131134033203125, -0.5020751953125, -0.00469970703125, 0.24571990966796875, 0.11648178100585938, 0.346405029296875, 0.901336669921875, 0.072357177734375, 0.7793960571289062, 0.45135498046875, 0.11994743347167969], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000022.npy"}
|
|
{"epoch": 0.032305433186490456, "step": 23, "batch_size": 64, "mean": 0.19183680415153503, "std": 0.46942731738090515, "min": -0.6531219482421875, "p10": -0.3376504898071288, "median": 0.13994407653808594, "p90": 0.7038593292236329, "max": 1.8954315185546875, "pos_frac": 0.609375, "sample": [-0.1165771484375, -0.0623016357421875, 1.1684494018554688, 0.09477615356445312, 0.5066299438476562, 0.4217262268066406, 0.16170120239257812, -0.09625244140625, -0.4772529602050781, -0.56256103515625, -0.046630859375, 1.425506591796875, 0.393585205078125, 0.3467979431152344, 0.11239242553710938, -0.12082290649414062, 0.058330535888671875, 0.2992515563964844, 0.2032470703125, 0.04139137268066406, 0.358795166015625, -0.404876708984375, 1.338531494140625, 0.2312774658203125, -0.24884605407714844, -0.226348876953125, 0.42120361328125, -0.4753913879394531, -0.01853179931640625, 0.36712646484375, 0.15521621704101562, -0.2238006591796875, 0.33473968505859375, 0.3507671356201172, 0.13570594787597656, -0.0284423828125, -0.02709197998046875, 0.1711578369140625, 0.4754009246826172, 0.6673221588134766, -0.002716064453125, 0.3146648406982422, 0.34857940673828125, 0.7131462097167969, -0.11660003662109375, 1.8954315185546875, -0.19724655151367188, -0.233245849609375, 0.35723114013671875, -0.6531219482421875, 0.22730064392089844, 0.68218994140625, 0.13767242431640625, -0.37570953369140625, -0.03343963623046875, 0.27989959716796875, -0.44908905029296875, 0.649444580078125, -0.03753089904785156, 0.027618408203125, 0.8293991088867188, -0.14933013916015625, 0.14221572875976562, 0.81549072265625], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000023.npy"}
|
|
{"epoch": 0.033773861967694566, "step": 24, "batch_size": 64, "mean": 0.2882956862449646, "std": 0.4127342700958252, "min": -0.808441162109375, "p10": -0.1610462188720703, "median": 0.2952308654785156, "p90": 0.7648834228515626, "max": 1.813751220703125, "pos_frac": 0.796875, "sample": [0.9461517333984375, 0.8596458435058594, 0.07043647766113281, -0.04705810546875, 0.4234771728515625, -0.17101097106933594, 0.5434951782226562, 0.2022857666015625, 0.3708953857421875, -0.13755035400390625, 0.4904212951660156, 0.009090423583984375, 0.1531391143798828, 0.49558258056640625, 0.3400993347167969, 0.017486572265625, -0.15442657470703125, 0.18581390380859375, 0.3940277099609375, 0.1183624267578125, 0.6917991638183594, -0.808441162109375, -0.16388320922851562, 0.20250701904296875, 0.2864532470703125, 0.2872772216796875, 0.4894065856933594, -0.4602508544921875, 0.7305755615234375, 0.30318450927734375, 1.813751220703125, 0.37493896484375, 0.7795867919921875, 0.14875030517578125, 0.4690704345703125, -0.18255615234375, 0.6370697021484375, 0.077301025390625, -0.0408782958984375, 0.39887237548828125, 0.516571044921875, 0.36342811584472656, -0.6407241821289062, 0.71978759765625, 0.07927703857421875, 0.9233856201171875, 0.5173492431640625, 0.021190643310546875, 0.271240234375, 0.24291038513183594, 0.6147727966308594, -0.08739089965820312, 0.0178985595703125, 0.2000102996826172, 0.4371795654296875, 0.7901611328125, 0.35362815856933594, 0.32447052001953125, 0.30712318420410156, -0.41271209716796875, 0.6789474487304688, -0.0650634765625, 0.2003326416015625, 0.9322509765625], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000024.npy"}
|
|
{"epoch": 0.03524229074889868, "step": 25, "batch_size": 64, "mean": 0.27471229434013367, "std": 0.5095961689949036, "min": -1.2962493896484375, "p10": -0.2758445739746093, "median": 0.20055007934570312, "p90": 0.8924045562744143, "max": 1.7015151977539062, "pos_frac": 0.734375, "sample": [0.18795394897460938, 0.4065399169921875, 1.24853515625, 0.2691612243652344, -0.2882843017578125, -0.07794380187988281, -0.5693511962890625, 0.945709228515625, 1.452056884765625, 0.0012454986572265625, 0.15106201171875, 0.6783447265625, -0.03316307067871094, 0.21062469482421875, 1.7015151977539062, -0.12090492248535156, -0.87890625, 0.8332672119140625, -0.34423828125, 0.8512153625488281, -0.30353546142578125, 0.2336883544921875, 0.4640655517578125, -0.24681854248046875, 0.9100570678710938, 0.19980621337890625, 0.7187423706054688, 0.51690673828125, -0.45272064208984375, -0.0021839141845703125, 0.2922821044921875, 0.12493896484375, 0.5981216430664062, 0.0927276611328125, 0.5492019653320312, 0.13488006591796875, -0.1544818878173828, 0.9610748291015625, 0.5256805419921875, 0.7298431396484375, 0.0772552490234375, 0.5694198608398438, 0.7136383056640625, 0.2748222351074219, 0.17840576171875, -0.07010650634765625, 0.0010986328125, 0.0280609130859375, 0.816986083984375, 0.4729423522949219, 0.2012939453125, 0.2739105224609375, 0.18249130249023438, -1.2962493896484375, 0.16557693481445312, 0.10811233520507812, 0.260772705078125, 0.5439109802246094, 0.6932220458984375, -0.1669769287109375, 0.13411903381347656, 1.009307861328125, -0.01157379150390625, -0.09556961059570312], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000025.npy"}
|
|
{"epoch": 0.03671071953010279, "step": 26, "batch_size": 64, "mean": 0.40866369009017944, "std": 0.591311514377594, "min": -0.5474052429199219, "p10": -0.20076732635498046, "median": 0.2954978942871094, "p90": 1.2916969299316408, "max": 2.45245361328125, "pos_frac": 0.734375, "sample": [0.69317626953125, 0.19689178466796875, 0.41261863708496094, 0.04737663269042969, 1.51629638671875, 0.31529998779296875, 0.1048583984375, 0.6091651916503906, 0.23659515380859375, -0.46033668518066406, 0.312835693359375, 0.6529388427734375, 2.45245361328125, 0.09844970703125, -0.15898895263671875, 0.900787353515625, -0.13748931884765625, -0.00399017333984375, 1.05828857421875, -0.0870361328125, 0.26308250427246094, 0.13847923278808594, 0.7188796997070312, 0.4821815490722656, 0.17156982421875, 1.3158187866210938, 0.2740936279296875, -0.1279449462890625, 0.3344879150390625, 0.5950775146484375, -0.5406494140625, 0.4683685302734375, 0.3227977752685547, -0.04783058166503906, -0.05879402160644531, 0.1152496337890625, 1.23541259765625, -0.05701446533203125, 0.16832733154296875, 0.9948883056640625, -0.31927490234375, 0.36124420166015625, 1.6771087646484375, 0.916717529296875, 0.27816009521484375, 1.040496826171875, -0.5474052429199219, 0.85986328125, 0.02341461181640625, 0.07828140258789062, -0.2041645050048828, 0.3462638854980469, -0.3180809020996094, 0.52996826171875, -0.192840576171875, 1.120819091796875, 1.4510955810546875, 1.3422317504882812, 1.3970184326171875, -0.2772102355957031, 0.275634765625, -0.19007110595703125, 0.5076217651367188, 0.47091102600097656], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000026.npy"}
|
|
{"epoch": 0.0381791483113069, "step": 27, "batch_size": 64, "mean": 0.50578773021698, "std": 0.5522134304046631, "min": -0.6874008178710938, "p10": 0.04342784881591797, "median": 0.39075660705566406, "p90": 1.1379104614257813, "max": 2.5250167846679688, "pos_frac": 0.90625, "sample": [0.553619384765625, 0.8731002807617188, 0.09229278564453125, 0.4936676025390625, 0.22980880737304688, 0.9870376586914062, 0.103912353515625, 0.239013671875, 1.0993728637695312, -0.6874008178710938, 0.6689624786376953, 0.39804840087890625, 0.37647247314453125, 0.3834648132324219, 0.20111083984375, 0.5538558959960938, 2.5250167846679688, 0.5316238403320312, 0.5152626037597656, 0.3229026794433594, 0.14633560180664062, 0.0862579345703125, 0.87432861328125, 0.05241966247558594, 0.071014404296875, 0.34249114990234375, 0.04625701904296875, 0.16391754150390625, 0.4144744873046875, 2.1846923828125, 0.13558197021484375, 0.04221534729003906, -0.05832672119140625, 1.69512939453125, 0.1952972412109375, 0.43291473388671875, 0.7731781005859375, 1.0574188232421875, 0.5287437438964844, 0.4212646484375, 0.3502044677734375, 0.2910003662109375, 0.453460693359375, 0.12537384033203125, 0.2090606689453125, 0.777252197265625, -0.3282623291015625, 0.6681442260742188, 1.0014419555664062, 1.1544265747070312, 1.526397705078125, 0.3522605895996094, 0.218353271484375, 0.2618255615234375, 0.8967437744140625, 0.6057357788085938, 1.5691909790039062, -0.2762641906738281, 0.3633251190185547, -0.07135772705078125, -0.0380859375, 1.2052383422851562, 0.42981719970703125, 0.558380126953125], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000027.npy"}
|
|
{"epoch": 0.039647577092511016, "step": 28, "batch_size": 64, "mean": 0.31288832426071167, "std": 0.5960327386856079, "min": -1.41656494140625, "p10": -0.34049453735351565, "median": 0.22955036163330078, "p90": 1.0363628387451171, "max": 1.7913970947265625, "pos_frac": 0.671875, "sample": [0.92803955078125, 0.6752471923828125, 0.03333282470703125, 0.17888832092285156, 0.6797542572021484, -0.33868408203125, -0.034442901611328125, 0.2605857849121094, -0.07616043090820312, 0.10455894470214844, -0.190216064453125, 0.775421142578125, 1.58441162109375, 0.21398162841796875, -0.09551239013671875, -0.1646289825439453, 0.38738250732421875, 1.7913970947265625, 0.10722732543945312, -0.14776611328125, 0.3312873840332031, -0.34127044677734375, -0.31302642822265625, 1.6445159912109375, 0.1306304931640625, 0.563690185546875, -0.20056533813476562, 1.0381011962890625, 1.2095947265625, 0.6404266357421875, 0.5509071350097656, 0.48595428466796875, -0.2930412292480469, -0.5948028564453125, 1.0323066711425781, 0.2451190948486328, 0.5893630981445312, 0.731597900390625, 0.0442962646484375, 0.15639114379882812, 0.19652557373046875, 0.3282279968261719, 0.2096691131591797, 0.536163330078125, 0.7290496826171875, 1.4095306396484375, 0.959228515625, -0.15835952758789062, 0.27152252197265625, 0.5382156372070312, 0.9734897613525391, -0.3890838623046875, 0.5489921569824219, -0.23975372314453125, -0.34313201904296875, 0.576202392578125, -0.08530044555664062, 1.28619384765625, 0.796905517578125, -0.39795494079589844, -0.3328399658203125, -1.41656494140625, -0.4390716552734375, 0.14270401000976562], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000028.npy"}
|
|
{"epoch": 0.041116005873715125, "step": 29, "batch_size": 64, "mean": 0.5648301839828491, "std": 0.5578703880310059, "min": -0.40212249755859375, "p10": -0.082275390625, "median": 0.5004663467407227, "p90": 1.2791419982910157, "max": 1.9565811157226562, "pos_frac": 0.859375, "sample": [1.2002639770507812, -0.17220687866210938, 0.8397674560546875, 0.62371826171875, 0.00240325927734375, 0.31964874267578125, 0.10308074951171875, 1.1522979736328125, 0.2519645690917969, 0.6993331909179688, 0.56134033203125, 0.15182876586914062, 1.19085693359375, 0.9547500610351562, 0.1964263916015625, 1.3846588134765625, 1.1009368896484375, -0.19694900512695312, -0.09172439575195312, 1.7263259887695312, -0.03009033203125, 0.6128196716308594, 0.5242462158203125, 0.3498859405517578, 0.8938140869140625, 1.2326202392578125, -0.08248138427734375, 0.4034919738769531, 0.3989105224609375, 0.41668701171875, -0.39849090576171875, 0.7981643676757812, 1.8134078979492188, 0.049102783203125, 1.0230941772460938, 0.4766864776611328, 0.0601959228515625, 0.037036895751953125, 1.9565811157226562, 0.33233642578125, 1.097412109375, 0.092498779296875, 0.6641426086425781, 0.663482666015625, 0.38297271728515625, 0.05627250671386719, 0.16983795166015625, -0.40212249755859375, 1.0124702453613281, 0.07071304321289062, 0.3652839660644531, 0.2523078918457031, 0.7264480590820312, 0.279052734375, 0.6288261413574219, 1.4539642333984375, -0.17028045654296875, 1.2990798950195312, 0.7249946594238281, 0.7250862121582031, 1.8635406494140625, 0.7257156372070312, -0.08179473876953125, 0.6824836730957031], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000029.npy"}
|
|
{"epoch": 0.042584434654919234, "step": 30, "batch_size": 64, "mean": 0.653449296951294, "std": 0.6144790649414062, "min": -0.628875732421875, "p10": -0.07979335784912107, "median": 0.5650615692138672, "p90": 1.4337203979492188, "max": 2.937652587890625, "pos_frac": 0.875, "sample": [0.6786727905273438, 0.13771820068359375, 0.2609405517578125, 0.70263671875, 1.498779296875, -0.1081085205078125, 0.19423294067382812, 0.1025543212890625, -0.2039947509765625, 0.9169692993164062, 0.458892822265625, 0.6606063842773438, 0.7581977844238281, 0.7911567687988281, 0.975250244140625, 1.2365608215332031, 0.7279510498046875, 1.2347946166992188, 0.11338424682617188, 1.28472900390625, 0.6985931396484375, -0.0565338134765625, 2.937652587890625, 0.5508613586425781, 1.5223846435546875, 0.26467323303222656, 0.537384033203125, 0.5609588623046875, 0.07675361633300781, 0.16721343994140625, 0.48268890380859375, 0.45954132080078125, 0.7156982421875, 0.03402900695800781, -0.17795944213867188, 0.5190811157226562, 1.679840087890625, 1.015869140625, 1.034088134765625, 0.872802734375, -0.08976173400878906, 1.1876983642578125, 0.5398693084716797, 0.17927169799804688, -0.628875732421875, 1.4082565307617188, 1.552215576171875, 0.6075820922851562, 1.1785430908203125, 1.05230712890625, 1.4446334838867188, 1.1253128051757812, 0.3032417297363281, -0.40762901306152344, -0.16948890686035156, 1.0621719360351562, 0.5691642761230469, 0.433349609375, 0.825927734375, 0.2259674072265625, 0.5379638671875, 1.9873046875, 0.22226715087890625, 0.35591888427734375], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000030.npy"}
|
|
{"epoch": 0.04405286343612335, "step": 31, "batch_size": 64, "mean": 0.509600043296814, "std": 0.6603589653968811, "min": -0.648193359375, "p10": -0.20883979797363272, "median": 0.39830780029296875, "p90": 1.3807533264160157, "max": 3.1382827758789062, "pos_frac": 0.796875, "sample": [0.2178478240966797, 1.158233642578125, -0.318023681640625, 0.5089340209960938, 1.5549697875976562, 0.6077880859375, -0.058032989501953125, -0.29711151123046875, 0.2816162109375, -0.32537841796875, 3.1382827758789062, 0.33019256591796875, 0.12003326416015625, 0.5123672485351562, 0.12714385986328125, 0.25754737854003906, 1.3176803588867188, 0.15699195861816406, 0.7789688110351562, 0.7686042785644531, 0.38262176513671875, -0.07741928100585938, 0.2902679443359375, 0.382781982421875, -0.12960433959960938, 0.4314117431640625, 0.4807929992675781, 0.4401435852050781, 0.7901153564453125, 0.3651580810546875, 2.1585235595703125, 0.5092620849609375, 1.8743133544921875, -0.2427978515625, 0.3030815124511719, -0.648193359375, -0.00478363037109375, 0.7914657592773438, 0.01397705078125, 0.8829803466796875, 0.6009521484375, 0.9905548095703125, -0.5167007446289062, 0.3719635009765625, -0.08325958251953125, -0.581146240234375, 0.300079345703125, 1.1530532836914062, 0.1393585205078125, 0.125640869140625, 0.3008079528808594, 1.3822021484375, 1.4011688232421875, 0.6243133544921875, -0.029834747314453125, 0.447418212890625, 0.5676155090332031, 0.6144866943359375, 0.2932586669921875, 0.4138336181640625, 1.0005035400390625, 1.4552993774414062, 1.3773727416992188, 0.4327049255371094], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000031.npy"}
|
|
{"epoch": 0.04552129221732746, "step": 32, "batch_size": 64, "mean": 0.7439004182815552, "std": 0.8441152572631836, "min": -0.6879386901855469, "p10": -0.18010711669921867, "median": 0.5186386108398438, "p90": 1.6642799377441406, "max": 3.90234375, "pos_frac": 0.84375, "sample": [0.36177825927734375, 0.4686279296875, 0.4662322998046875, 1.4240188598632812, 0.5830345153808594, 1.0325469970703125, 0.04319000244140625, 0.547271728515625, 0.1546192169189453, 1.2104415893554688, 1.3915367126464844, 0.4877128601074219, 0.003238677978515625, 0.04076385498046875, 1.12811279296875, 1.3668136596679688, -0.4036216735839844, 0.9736747741699219, -0.24228286743164062, 2.362823486328125, 0.011566162109375, 1.4944686889648438, 2.274078369140625, -0.3532867431640625, 1.0964126586914062, 0.4900054931640625, -0.024326324462890625, -0.0994110107421875, -0.6879386901855469, 1.1089401245117188, -0.214691162109375, 0.2907981872558594, 0.4310741424560547, 2.758056640625, 1.4549217224121094, 0.4701499938964844, 1.0430450439453125, 0.6778907775878906, -0.08064079284667969, 0.3224945068359375, 1.216644287109375, 0.6353397369384766, 0.0047454833984375, 0.6943778991699219, 3.90234375, 1.22479248046875, 0.45941925048828125, 2.0762252807617188, 2.0333099365234375, -0.2638111114501953, 0.1468334197998047, 0.24303054809570312, 1.6580581665039062, 1.0270614624023438, -0.26514434814453125, 0.08674049377441406, 1.3905448913574219, 1.3992156982421875, 0.271209716796875, 1.274627685546875, 0.112884521484375, 0.606842041015625, 1.6669464111328125, 0.14324951171875], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000032.npy"}
|
|
{"epoch": 0.04698972099853157, "step": 33, "batch_size": 64, "mean": 0.5970076322555542, "std": 0.6120725274085999, "min": -0.858245849609375, "p10": -0.0748125076293944, "median": 0.5090351104736328, "p90": 1.5213333129882813, "max": 2.277313232421875, "pos_frac": 0.890625, "sample": [-0.12845611572265625, 0.5207958221435547, 0.38959312438964844, 0.4206085205078125, 2.277313232421875, 0.2947998046875, 0.3605194091796875, 0.57843017578125, 0.7804775238037109, 0.8078670501708984, 0.32755279541015625, 0.8907012939453125, 1.88629150390625, 0.907440185546875, 1.2244720458984375, 0.5133018493652344, 0.5986747741699219, -0.858245849609375, 0.3642730712890625, -0.705230712890625, 0.14852142333984375, 1.573516845703125, 1.0248031616210938, 1.4744110107421875, 1.1745452880859375, 0.27634429931640625, 1.6596221923828125, 0.262664794921875, 0.1023712158203125, 1.54144287109375, 0.21625900268554688, -0.2814483642578125, 1.947906494140625, 0.05035591125488281, 0.31739044189453125, 0.9898147583007812, 0.4242706298828125, -0.21591949462890625, 0.5761184692382812, 0.5611343383789062, 0.17713546752929688, 1.2198562622070312, 0.23139190673828125, 0.17238998413085938, 0.5293655395507812, 0.9428329467773438, 0.47879791259765625, 1.068023681640625, 0.5876750946044922, 0.33312225341796875, 0.30158233642578125, -0.23166275024414062, 1.6833953857421875, 0.1339263916015625, 0.30078125, 0.8016433715820312, 1.1960220336914062, -0.41704559326171875, 0.6461639404296875, 0.4291534423828125, 0.25827789306640625, 0.5047683715820312, 0.6820659637451172, 0.9035186767578125], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000033.npy"}
|
|
{"epoch": 0.048458149779735685, "step": 34, "batch_size": 64, "mean": 0.784916877746582, "std": 0.8293022513389587, "min": -0.5492477416992188, "p10": -0.1621749877929687, "median": 0.6180438995361328, "p90": 1.8990203857421881, "max": 2.7160491943359375, "pos_frac": 0.8125, "sample": [2.385009765625, -0.4899024963378906, 0.898223876953125, -0.12560272216796875, 2.7160491943359375, 0.45932579040527344, 1.4888916015625, 1.5128936767578125, -0.17784881591796875, -0.045562744140625, -0.021881103515625, -0.12232017517089844, 0.49460601806640625, -0.5177993774414062, 1.470245361328125, 1.7713165283203125, 0.35755348205566406, -0.20164108276367188, 1.5563201904296875, 0.382293701171875, 1.53125, -0.5492477416992188, 1.0821762084960938, 1.5539894104003906, 1.5836181640625, 1.58770751953125, 2.1083602905273438, -0.5413398742675781, 0.474456787109375, 0.9648895263671875, 0.16300201416015625, 0.875579833984375, 1.3370780944824219, 1.0902881622314453, 1.386627197265625, 0.6262969970703125, 0.30649757385253906, 1.9537506103515625, 0.6097908020019531, -0.4948844909667969, -0.016147613525390625, 0.2556915283203125, 0.9379310607910156, 1.6837921142578125, 0.6861724853515625, 0.522491455078125, 0.34784698486328125, 0.3596534729003906, 1.4049301147460938, 0.15564727783203125, 0.4123191833496094, 0.1649322509765625, 0.9978790283203125, 0.7629547119140625, 0.4010467529296875, 0.18283653259277344, 2.2187881469726562, 0.7405433654785156, 0.24993133544921875, 0.7465858459472656, 2.5203933715820312, 2.63140869140625, 0.34409523010253906, 0.08289718627929688], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000034.npy"}
|
|
{"epoch": 0.049926578560939794, "step": 35, "batch_size": 64, "mean": 1.2465664148330688, "std": 1.223884105682373, "min": -1.2185287475585938, "p10": 0.11322650909423834, "median": 1.049269676208496, "p90": 2.4073699951171874, "max": 7.219970703125, "pos_frac": 0.9375, "sample": [1.5428009033203125, 1.1834259033203125, 2.629302978515625, 1.3792915344238281, -0.14657974243164062, 0.7664165496826172, 0.6544189453125, -0.325897216796875, 0.38535308837890625, 1.5922698974609375, 1.950103759765625, 0.17232131958007812, 1.3879623413085938, 0.4931983947753906, 0.456512451171875, 0.5931777954101562, 2.3389968872070312, 3.8137283325195312, 0.9789199829101562, 0.4493751525878906, 1.1979331970214844, 1.8988037109375, 3.3627471923828125, 0.5358963012695312, 0.00234222412109375, 0.4626922607421875, 0.41260719299316406, 1.2141647338867188, 0.355712890625, 1.4027633666992188, 7.219970703125, 1.4865570068359375, 0.6837615966796875, 1.643402099609375, 0.6675357818603516, 3.581207275390625, 0.193572998046875, 1.6552734375, 1.998809814453125, 3.0317916870117188, 0.3772125244140625, 2.4087066650390625, 0.9565811157226562, 1.2666702270507812, 0.8403434753417969, 0.08790016174316406, 2.0316925048828125, 2.4042510986328125, 1.2352066040039062, 0.7685165405273438, 0.7945270538330078, 0.03363037109375, 0.511627197265625, 1.8919830322265625, 1.5675811767578125, 1.0086193084716797, 1.0899200439453125, -0.3007774353027344, 1.4690780639648438, -1.2185287475585938, 0.6302032470703125, 2.1877593994140625, 2.11334228515625, 0.3215599060058594], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000035.npy"}
|
|
{"epoch": 0.0513950073421439, "step": 36, "batch_size": 64, "mean": 1.1986362934112549, "std": 1.2389873266220093, "min": -1.915435791015625, "p10": -0.19995822906494137, "median": 1.034867286682129, "p90": 3.056024169921875, "max": 3.65667724609375, "pos_frac": 0.84375, "sample": [1.2816696166992188, -0.20799827575683594, 1.0622730255126953, 1.3883743286132812, 0.938446044921875, 0.2884254455566406, 1.3077850341796875, -0.311676025390625, -0.2870521545410156, 2.593414306640625, 2.330810546875, 0.08717155456542969, -0.14481735229492188, 0.6850013732910156, 0.2230701446533203, 1.239166259765625, -0.21741104125976562, 1.7801971435546875, 1.9158935546875, -0.1811981201171875, 0.170806884765625, -0.3270111083984375, 0.5302047729492188, 0.12704849243164062, 2.5626220703125, 3.5343894958496094, 1.3488655090332031, -0.8565521240234375, 3.388671875, 0.863800048828125, 0.68890380859375, 1.5217056274414062, -0.11502456665039062, 3.2554550170898438, 0.14948272705078125, 0.05726814270019531, 2.770965576171875, 0.21588897705078125, 2.4322509765625, 0.6066207885742188, 2.9410247802734375, 1.2369518280029297, 3.0791778564453125, 1.21746826171875, 3.136749267578125, 0.9149284362792969, 0.7656593322753906, 3.65667724609375, 1.0074615478515625, -1.915435791015625, 2.747600555419922, 0.7627716064453125, 2.95184326171875, 1.1056632995605469, 2.4068069458007812, 1.6388282775878906, 0.2174530029296875, 0.5696907043457031, 3.0019989013671875, 0.36072731018066406, 1.2540130615234375, 3.155029296875, 1.0721855163574219, 0.7295417785644531], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000036.npy"}
|
|
{"epoch": 0.05286343612334802, "step": 37, "batch_size": 64, "mean": 1.0305198431015015, "std": 1.3288214206695557, "min": -2.19659423828125, "p10": -0.08535404205322264, "median": 0.6794052124023438, "p90": 2.7334373474121096, "max": 5.687469482421875, "pos_frac": 0.8125, "sample": [2.296173095703125, -0.03664398193359375, 1.6292343139648438, 1.4595413208007812, -0.070953369140625, 0.02832794189453125, 0.25354766845703125, 0.6956863403320312, 1.1157913208007812, 0.03382110595703125, 0.6039352416992188, 2.6912994384765625, 0.3260993957519531, -0.680572509765625, -0.417938232421875, 0.32755279541015625, 0.7301788330078125, 0.025537490844726562, 1.824777603149414, 5.687469482421875, 0.98382568359375, 4.204010009765625, 1.362762451171875, 0.9379615783691406, 0.67303466796875, 1.6571693420410156, 0.7560577392578125, 0.19811630249023438, 1.4286117553710938, 1.0593280792236328, 0.5918693542480469, 0.33971405029296875, 0.32956504821777344, -1.0215377807617188, 0.79901123046875, -0.00408935546875, 0.3195228576660156, 1.878814697265625, 2.2257614135742188, 3.5047454833984375, -0.02788543701171875, 2.78985595703125, 0.56884765625, 1.341949462890625, 0.18985748291015625, 0.6857757568359375, -0.07099723815917969, -0.1009521484375, 0.6556015014648438, 1.6582183837890625, 2.7415237426757812, 0.17493820190429688, 2.714569091796875, 2.4498977661132812, 0.4224815368652344, 3.1577301025390625, 1.4460258483886719, -2.19659423828125, -0.10692787170410156, -0.0915069580078125, 2.5917015075683594, 3.8116302490234375, 0.0760040283203125, 0.3244037628173828], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000037.npy"}
|
|
{"epoch": 0.05433186490455213, "step": 38, "batch_size": 64, "mean": 1.388970971107483, "std": 1.7071802616119385, "min": -1.5025787353515625, "p10": -0.04564514160156244, "median": 0.8321685791015625, "p90": 3.010504150390625, "max": 8.43121337890625, "pos_frac": 0.890625, "sample": [0.2856636047363281, 8.43121337890625, 4.296661376953125, 2.131685256958008, 0.2728691101074219, 0.013135910034179688, 2.0399093627929688, -0.1582489013671875, 0.9610061645507812, 2.253631591796875, 2.049407958984375, 0.7994918823242188, 0.272918701171875, 2.7742843627929688, 1.043426513671875, 0.05224418640136719, 0.188262939453125, 0.4593238830566406, 0.03965187072753906, 2.4856185913085938, 0.608734130859375, 2.043060302734375, 2.925445556640625, 0.09732437133789062, 4.0078582763671875, 0.023197174072265625, 1.909881591796875, 2.7982177734375, 1.4972648620605469, 0.10429763793945312, 0.5815696716308594, 2.9844207763671875, -1.5025787353515625, 0.7626113891601562, 0.6607227325439453, 0.3848724365234375, 0.8524169921875, 5.6640167236328125, 0.811920166015625, -0.26944732666015625, 3.0216827392578125, 5.043426513671875, 1.734771728515625, 0.10457992553710938, 1.0398235321044922, -0.14255523681640625, 0.3165435791015625, 0.3112144470214844, 0.7448272705078125, 1.2475738525390625, -0.6375656127929688, 0.6380691528320312, -0.07083702087402344, 1.1165046691894531, 1.1732101440429688, 2.2337493896484375, 1.48583984375, 5.558097839355469, 1.2439804077148438, 0.3282470703125, 2.0589675903320312, -0.5425205230712891, 0.5461196899414062, 2.7024269104003906], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000038.npy"}
|
|
{"epoch": 0.055800293685756244, "step": 39, "batch_size": 64, "mean": 1.6994619369506836, "std": 1.4307514429092407, "min": -2.3300018310546875, "p10": 0.2582473754882813, "median": 1.4077682495117188, "p90": 3.6793312072753914, "max": 5.1661376953125, "pos_frac": 0.921875, "sample": [2.3359756469726562, 1.8044586181640625, 5.1661376953125, 2.31854248046875, 1.6036834716796875, -0.3477935791015625, 2.7147598266601562, 2.91693115234375, 1.1060161590576172, 2.572969436645508, 0.7512588500976562, 3.4615631103515625, 1.8178863525390625, 1.1335811614990234, 4.217082977294922, 0.7449722290039062, 0.31026458740234375, 0.8115882873535156, 0.6970672607421875, 0.9540481567382812, 0.5172042846679688, 2.1683807373046875, 2.47802734375, 0.8231983184814453, 0.9283447265625, 2.4420700073242188, -0.30333709716796875, 1.8789024353027344, 4.8787841796875, 0.07878875732421875, 0.6840362548828125, 0.6304187774658203, 3.5285720825195312, 2.2369117736816406, -2.3300018310546875, 1.9127082824707031, 0.6643199920654297, 2.4691810607910156, 4.152996063232422, 4.7417144775390625, 0.9148902893066406, 1.7227935791015625, 3.7439422607421875, 0.8435287475585938, 1.21185302734375, 0.624725341796875, -0.1059112548828125, 0.7999038696289062, 1.8692703247070312, 0.9143009185791016, 0.6075057983398438, 3.759662628173828, 2.8680877685546875, 2.8081817626953125, 0.6201343536376953, 3.4743881225585938, 3.276580810546875, 1.0574951171875, 2.2710952758789062, 0.23595428466796875, 0.4761238098144531, -0.06368827819824219, 1.0475177764892578, 2.115011215209961], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000039.npy"}
|
|
{"epoch": 0.05726872246696035, "step": 40, "batch_size": 64, "mean": 1.4811410903930664, "std": 1.7293864488601685, "min": -0.7967071533203125, "p10": -0.34040603637695305, "median": 1.049215316772461, "p90": 3.8233551025390633, "max": 7.809051513671875, "pos_frac": 0.8125, "sample": [-0.3970947265625, 0.2427825927734375, 0.7917861938476562, 0.7123775482177734, 1.857086181640625, 0.4632415771484375, 0.32492828369140625, 0.24150848388671875, 0.2999401092529297, 2.152750015258789, 6.4745635986328125, 3.679351806640625, 0.5047874450683594, 2.7591629028320312, 1.7525405883789062, -0.3983802795410156, 1.9927501678466797, 2.137420654296875, 0.17687416076660156, -0.7967071533203125, 2.79205322265625, -0.3639373779296875, 0.31866455078125, -0.43035888671875, 2.533172607421875, 1.535430908203125, 0.003749847412109375, 0.8160667419433594, 0.45477294921875, -0.2577056884765625, 3.9448089599609375, -0.10207939147949219, 3.389678955078125, -0.1559295654296875, 0.753997802734375, 4.162483215332031, 0.3433074951171875, 3.88507080078125, 0.1935882568359375, 1.238595962524414, 4.869941711425781, 0.9212417602539062, 1.1393051147460938, 7.809051513671875, -0.3975563049316406, 2.2314300537109375, -0.36884307861328125, 1.358612060546875, 2.708282470703125, 1.6268463134765625, 1.986480712890625, 2.4062652587890625, 3.6744918823242188, 1.5413551330566406, 0.9591255187988281, 1.6495552062988281, 4.642791748046875, -0.1759796142578125, 1.6147613525390625, 0.1232147216796875, -0.28549957275390625, 1.7738456726074219, 2.3288116455078125, 0.6283988952636719], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000040.npy"}
|
|
{"epoch": 0.05873715124816446, "step": 41, "batch_size": 64, "mean": 1.8831415176391602, "std": 2.0541796684265137, "min": -3.408966064453125, "p10": 0.0470733642578126, "median": 1.2705936431884766, "p90": 4.1527099609375, "max": 9.093711853027344, "pos_frac": 0.90625, "sample": [0.8435745239257812, 1.1566429138183594, 3.1757965087890625, 1.372650146484375, 1.0707321166992188, 2.390869140625, 3.8371124267578125, 2.4279022216796875, 0.3527679443359375, 3.527099609375, 5.4217529296875, 1.0146408081054688, 1.9016571044921875, 9.093711853027344, 0.23920822143554688, 0.006744384765625, 1.10699462890625, 1.3238487243652344, 0.37635040283203125, -1.370697021484375, 1.379150390625, 3.994251251220703, 4.712890625, 0.9140205383300781, 7.102386474609375, 1.6558723449707031, 3.80291748046875, 4.164794921875, 0.9275856018066406, -0.026340484619140625, 2.29888916015625, 1.156158447265625, 7.205291748046875, 0.8600692749023438, 1.046468734741211, 3.9195175170898438, 1.2173385620117188, 2.818084716796875, -0.0110931396484375, 2.128173828125, 4.12451171875, 1.7578105926513672, -3.408966064453125, 3.4474945068359375, 0.14117431640625, 1.01263427734375, 2.1391372680664062, 1.952545166015625, 1.6499958038330078, 0.6582736968994141, 0.9057464599609375, 1.5818862915039062, 0.1762237548828125, 0.7838363647460938, 0.7109260559082031, 0.9674320220947266, 0.30460357666015625, 3.6166954040527344, 0.591552734375, 0.3840370178222656, -0.533050537109375, -0.2678203582763672, 2.5222854614257812, 4.7663116455078125], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000041.npy"}
|
|
{"epoch": 0.06020558002936858, "step": 42, "batch_size": 64, "mean": 2.5920138359069824, "std": 2.4646682739257812, "min": -1.6232757568359375, "p10": 0.23197822570800788, "median": 2.0077552795410156, "p90": 6.042955017089844, "max": 12.5054931640625, "pos_frac": 0.9375, "sample": [2.0346221923828125, 0.20348358154296875, 1.8612308502197266, 9.709365844726562, 12.5054931640625, 2.5883026123046875, 0.5544204711914062, 5.8527984619140625, 3.1351966857910156, 3.217559814453125, 1.7773895263671875, 6.342994689941406, 3.4203567504882812, 6.9559326171875, 0.44801902770996094, 2.0608749389648438, 2.7175445556640625, 0.17005157470703125, 2.3755340576171875, 3.3638381958007812, 1.4498710632324219, 1.3575725555419922, 3.9727096557617188, 0.57293701171875, 0.7825393676757812, 2.7164955139160156, -0.27972412109375, 2.84344482421875, 2.00933837890625, -0.2594451904296875, 1.8508338928222656, 6.12445068359375, 0.2984657287597656, 6.13311767578125, 2.0061721801757812, 1.7817535400390625, 0.9259490966796875, 1.6206645965576172, 1.920806884765625, -0.48885345458984375, 3.9803466796875, 0.119293212890625, 2.0750045776367188, 6.2005615234375, 1.5237159729003906, 3.472412109375, 0.9127540588378906, 0.5166053771972656, 1.0387306213378906, 1.4536361694335938, 0.82781982421875, 5.424896240234375, -1.6232757568359375, 0.6750545501708984, 2.1170501708984375, 4.183277130126953, 0.5406303405761719, 1.4398193359375, 4.614601135253906, 2.3798065185546875, 4.2570037841796875, 5.587684631347656, 0.9409656524658203, 4.59637451171875], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000042.npy"}
|
|
{"epoch": 0.06167400881057269, "step": 43, "batch_size": 64, "mean": 2.515774726867676, "std": 2.0010929107666016, "min": -1.4007415771484375, "p10": 0.18395519256591802, "median": 2.4800186157226562, "p90": 4.601935577392578, "max": 9.7196044921875, "pos_frac": 0.96875, "sample": [0.3124351501464844, 0.044208526611328125, 0.06696319580078125, 3.6884613037109375, 4.105369567871094, 0.8073577880859375, 3.3774261474609375, 2.542755126953125, 2.8227767944335938, 6.830718994140625, 0.7324676513671875, 1.9257278442382812, 0.7669467926025391, 2.224884033203125, 3.5120201110839844, 0.9477462768554688, 3.522705078125, 5.7416839599609375, 2.0051498413085938, 0.3210010528564453, 6.714141845703125, 2.832244873046875, 5.264167785644531, 0.8362960815429688, 3.0937156677246094, 2.47296142578125, 0.094696044921875, 2.4214744567871094, 0.045196533203125, 4.453643798828125, 1.4868545532226562, 2.726408004760742, 3.795928955078125, 2.991180419921875, 2.8284835815429688, 3.7487411499023438, 0.73101806640625, 2.495685577392578, 1.8821048736572266, 0.1632843017578125, 1.3307113647460938, 2.088165283203125, 6.9962615966796875, 3.7420120239257812, 1.3177871704101562, -1.4007415771484375, 1.246694564819336, 3.104339599609375, 2.4870758056640625, 4.0979461669921875, 2.964550018310547, 3.7060928344726562, 2.3112411499023438, 0.42194557189941406, 1.8711166381835938, 9.7196044921875, 4.146087646484375, 1.7807502746582031, -0.00263214111328125, 0.37209510803222656, 2.5697174072265625, 4.665489196777344, 3.8640594482421875, 0.23218727111816406], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000043.npy"}
|
|
{"epoch": 0.0631424375917768, "step": 44, "batch_size": 64, "mean": 2.7573585510253906, "std": 2.2134153842926025, "min": -1.0622215270996094, "p10": 0.3305505752563477, "median": 2.3011474609375, "p90": 5.552581787109376, "max": 9.81756591796875, "pos_frac": 0.96875, "sample": [4.340572357177734, 2.2477264404296875, 2.3145751953125, 0.4092998504638672, 6.737216949462891, 2.5703964233398438, 3.2394638061523438, 1.384490966796875, 2.2072181701660156, 5.3164215087890625, 6.80181884765625, 0.5250892639160156, 4.4973602294921875, 4.964202880859375, 5.151641845703125, 3.8811569213867188, 0.8417816162109375, 2.828826904296875, 2.062702178955078, 5.6537933349609375, 0.33957481384277344, 2.9046173095703125, 2.850341796875, 7.0893402099609375, 2.167816162109375, 2.9837722778320312, 0.0100250244140625, 1.9101409912109375, 5.110504150390625, 1.7647953033447266, 2.5907363891601562, 0.05268669128417969, 1.8043479919433594, 0.9849662780761719, 1.5053234100341797, 3.1518630981445312, 0.02442169189453125, 2.0114593505859375, 5.09912109375, 2.2877197265625, 0.7976226806640625, 2.314697265625, 3.8087310791015625, 3.3058853149414062, 1.2537651062011719, 3.2087554931640625, 3.296417236328125, 2.0689849853515625, 0.6418437957763672, 0.32668304443359375, 6.4117431640625, 4.1819610595703125, 1.95440673828125, 3.471038818359375, 0.7153034210205078, 9.81756591796875, 0.8090019226074219, -0.7215118408203125, 3.061187744140625, 1.6783218383789062, -1.0622215270996094, 0.10811424255371094, 1.7049674987792969, 8.698341369628906], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000044.npy"}
|
|
{"epoch": 0.06461086637298091, "step": 45, "batch_size": 64, "mean": 2.2421412467956543, "std": 2.5386264324188232, "min": -2.0013961791992188, "p10": -0.2149482727050781, "median": 1.699789047241211, "p90": 6.063967895507813, "max": 11.78765869140625, "pos_frac": 0.84375, "sample": [3.3209609985351562, 1.772369384765625, 0.8273849487304688, 2.8310203552246094, 0.4220161437988281, 1.5307788848876953, 3.5592498779296875, 6.3007354736328125, 4.6724700927734375, 2.710601806640625, 0.5011444091796875, 7.53271484375, 3.537363052368164, 0.3037567138671875, 0.11822128295898438, 1.9737472534179688, -0.367645263671875, 1.7096061706542969, 0.11700248718261719, 5.938724517822266, -0.2275848388671875, 0.30790138244628906, 2.37530517578125, 0.4984703063964844, 0.7797031402587891, 5.545724868774414, 6.585075378417969, 4.447425842285156, 0.1573333740234375, 0.39614295959472656, 4.592437744140625, 2.6032485961914062, 6.0642242431640625, -0.0770111083984375, -0.27080535888671875, 1.390045166015625, 6.301483154296875, 3.9151992797851562, -0.18546295166015625, 1.689971923828125, 1.3543128967285156, 0.19313812255859375, -2.0013961791992188, -0.9620285034179688, 1.19476318359375, 11.78765869140625, 1.8166580200195312, 6.748908996582031, 3.273681640625, -0.8217315673828125, 2.290191650390625, 1.3100357055664062, 1.8651390075683594, 0.4128093719482422, 1.584238052368164, 6.0633697509765625, 1.5720367431640625, 2.2590179443359375, 2.106718063354492, 2.0669479370117188, -0.1612720489501953, 3.8703994750976562, 0.5641384124755859, -1.0917549133300781], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000045.npy"}
|
|
{"epoch": 0.06607929515418502, "step": 46, "batch_size": 64, "mean": 2.8972856998443604, "std": 3.2219743728637695, "min": -2.8835830688476562, "p10": 0.01476325988769532, "median": 2.140162467956543, "p90": 6.554290008544922, "max": 15.4105224609375, "pos_frac": 0.90625, "sample": [0.4691486358642578, 0.6594924926757812, 5.3415374755859375, 7.504180908203125, 3.936859130859375, -2.8835830688476562, 1.4215888977050781, 1.7932415008544922, 6.580078125, 3.580472946166992, 6.305034637451172, 1.6275463104248047, 0.2256317138671875, 2.355375289916992, 5.11065673828125, 0.022579193115234375, -0.0601654052734375, 0.5389251708984375, 4.177558898925781, 11.84149169921875, -0.5144634246826172, 0.01141357421875, -0.4805717468261719, 3.7879486083984375, 0.3801383972167969, 3.9579620361328125, 4.6811065673828125, 1.490509033203125, 4.126468658447266, 2.2845630645751953, 4.340263366699219, 0.24698257446289062, -2.795257568359375, 0.8200302124023438, 3.3795623779296875, 5.835273742675781, 5.414276123046875, 0.20990753173828125, 7.871192932128906, 0.14644622802734375, 0.16956329345703125, 3.4140853881835938, 5.131874084472656, 0.2533416748046875, 15.4105224609375, 0.3400993347167969, 6.8524932861328125, 5.683990478515625, 0.6145439147949219, 2.7701568603515625, 1.7484245300292969, 8.723220825195312, 3.0347442626953125, 0.3621635437011719, 1.9957618713378906, 6.494117736816406, 1.3138084411621094, 2.6370391845703125, 3.005126953125, 5.916664123535156, 1.7833328247070312, 0.8947772979736328, 1.7184066772460938, -0.5833816528320312], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000046.npy"}
|
|
{"epoch": 0.06754772393538913, "step": 47, "batch_size": 64, "mean": 3.0041422843933105, "std": 2.786167621612549, "min": -2.4296951293945312, "p10": 0.10164737701416017, "median": 2.225740432739258, "p90": 6.859772491455079, "max": 10.965087890625, "pos_frac": 0.90625, "sample": [2.2626113891601562, 3.3261642456054688, 0.21726226806640625, 1.2758922576904297, 5.063350677490234, 1.5527725219726562, 1.36669921875, 1.7375411987304688, 7.308277130126953, 2.8736095428466797, 0.09767913818359375, 6.688743591308594, 6.7170562744140625, 1.2465744018554688, 3.3008651733398438, 2.467639923095703, -0.20012664794921875, 1.579925537109375, 1.7332839965820312, 5.094474792480469, 6.920936584472656, 1.3554000854492188, 0.7457771301269531, 3.376588821411133, 4.105873107910156, 1.5227165222167969, -0.7369537353515625, 7.332305908203125, 7.468902587890625, 2.6939849853515625, -0.1960906982421875, 3.688610076904297, 6.055057525634766, 4.880577087402344, -2.4296951293945312, 0.11090660095214844, 0.14351654052734375, 1.0534114837646484, 0.6642990112304688, 10.965087890625, 5.9459075927734375, 1.9372615814208984, 1.3566341400146484, 2.68701171875, 9.097274780273438, 6.575927734375, 1.0461502075195312, 3.5354480743408203, 10.625228881835938, -1.2335433959960938, 1.4294509887695312, 2.0360260009765625, -0.3353729248046875, 1.6852874755859375, 2.4666976928710938, 1.3960723876953125, 5.3659820556640625, 1.820037841796875, 4.148345947265625, 1.9020442962646484, 4.781768798828125, 4.1121673583984375, 2.262918472290039, 2.1888694763183594], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000047.npy"}
|
|
{"epoch": 0.06901615271659324, "step": 48, "batch_size": 64, "mean": 3.0878491401672363, "std": 3.2279679775238037, "min": -8.221755981445312, "p10": 0.07244377136230479, "median": 2.4346399307250977, "p90": 7.549639892578127, "max": 12.372177124023438, "pos_frac": 0.921875, "sample": [3.6817779541015625, 0.9157867431640625, 7.720916748046875, 8.79541015625, 7.149993896484375, 2.809955596923828, -0.9242401123046875, 2.3912124633789062, 5.65167236328125, 4.5514373779296875, 1.5755157470703125, 6.247344970703125, 2.2378692626953125, 1.0306549072265625, 0.17364501953125, 1.0487480163574219, 7.09759521484375, -0.0817108154296875, 0.8285903930664062, 0.029071807861328125, 1.2993354797363281, 8.983680725097656, -0.46018409729003906, 2.0756988525390625, 6.0327606201171875, 1.702728271484375, 3.6628341674804688, 3.7397708892822266, 0.0140380859375, 3.2350826263427734, 1.9434776306152344, 1.8164386749267578, 4.2558441162109375, 2.478067398071289, 9.316390991210938, 3.6682186126708984, 5.247833251953125, 10.337715148925781, 3.77862548828125, 1.5253181457519531, 0.7599868774414062, 1.3470325469970703, 0.795806884765625, 2.7630043029785156, 0.872650146484375, 4.079685211181641, 3.0239429473876953, 1.8866424560546875, -1.7344188690185547, 8.387519836425781, 2.7461929321289062, 3.568347930908203, 1.9766845703125, 12.372177124023438, 5.155403137207031, -8.221755981445312, 5.639915466308594, 1.7676849365234375, 0.9575004577636719, 0.35882568359375, 4.9755401611328125, 1.8938484191894531, 2.699634552001953, 1.9655609130859375], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000048.npy"}
|
|
{"epoch": 0.07048458149779736, "step": 49, "batch_size": 64, "mean": 3.8576841354370117, "std": 4.1148786544799805, "min": -3.9347991943359375, "p10": -0.16564731597900373, "median": 3.4972152709960938, "p90": 8.18700180053711, "max": 17.35400390625, "pos_frac": 0.890625, "sample": [3.5714645385742188, 0.8305244445800781, 0.8225517272949219, 3.5837326049804688, 2.4979209899902344, 6.216648101806641, 0.24434661865234375, 2.983793258666992, -2.3425445556640625, 0.5142974853515625, 0.1263904571533203, 2.313129425048828, 3.3640594482421875, 9.636520385742188, 7.879608154296875, 3.37518310546875, 4.259555816650391, 2.487344741821289, 4.493782043457031, 1.751434326171875, 2.236295700073242, -3.2611923217773438, 3.7143478393554688, -0.24391746520996094, 14.960052490234375, 11.1241455078125, 1.5462112426757812, 5.903720855712891, 4.2733001708984375, 0.7080287933349609, 7.930381774902344, 17.35400390625, 4.511566162109375, 0.8931808471679688, -1.7258071899414062, -3.9347991943359375, 3.7941055297851562, 3.9572715759277344, 4.604358673095703, 4.5731658935546875, 3.306009292602539, 0.0169830322265625, -1.3497314453125, 4.750835418701172, 5.1649932861328125, 15.22906494140625, 2.7613525390625, 2.4238204956054688, 8.296981811523438, 7.41107177734375, 6.52130126953125, 3.4229660034179688, 5.108673095703125, 5.666816711425781, 13.148345947265625, 0.8719539642333984, -0.2905120849609375, 3.5838470458984375, 1.966634750366211, 4.0374755859375, 5.9310760498046875, 2.0178909301757812, 4.4149627685546875, 0.9508056640625], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000049.npy"}
|
|
{"epoch": 0.07195301027900147, "step": 50, "batch_size": 64, "mean": 4.085658073425293, "std": 3.9761579036712646, "min": -2.955495834350586, "p10": -0.6736808776855469, "median": 3.277088165283203, "p90": 9.451666641235354, "max": 14.658309936523438, "pos_frac": 0.84375, "sample": [7.668548583984375, -0.5122909545898438, -2.0373992919921875, 10.453628540039062, 1.4476814270019531, 3.173095703125, 7.870647430419922, 2.0386428833007812, 3.432270050048828, 1.903071403503418, -0.8531723022460938, 7.2229766845703125, 3.816741943359375, 14.48297119140625, 0.423431396484375, 2.955078125, 7.5774383544921875, 3.1700687408447266, 10.54547119140625, 4.6043701171875, 0.8765354156494141, 5.101104736328125, 9.804679870605469, 2.452880859375, 2.1061019897460938, 5.2591705322265625, 0.028224945068359375, -0.6912612915039062, 3.0403804779052734, 9.0426025390625, 6.5758209228515625, 5.678092956542969, 9.62697982788086, 14.658309936523438, -2.955495834350586, 0.5869235992431641, 1.1647872924804688, 1.9544448852539062, 0.2006816864013672, 3.7347640991210938, 2.0000152587890625, 1.8002147674560547, 6.8501434326171875, 4.532798767089844, 7.906345367431641, 6.984779357910156, -0.632659912109375, 2.0678939819335938, 5.75592041015625, -0.85968017578125, 6.1024169921875, 3.3810806274414062, 4.571441650390625, 6.751792907714844, 2.2906150817871094, 6.232765197753906, 8.056381225585938, 2.3498001098632812, 12.252571105957031, -1.0369338989257812, 7.133087158203125, 1.6687850952148438, -2.254241943359375, -0.052219390869140625], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000050.npy"}
|
|
{"epoch": 0.07342143906020558, "step": 51, "batch_size": 64, "mean": 3.9936554431915283, "std": 5.230474948883057, "min": -6.4423828125, "p10": -0.7762001037597654, "median": 2.739543914794922, "p90": 10.26396942138672, "max": 21.270645141601562, "pos_frac": 0.828125, "sample": [9.84649658203125, 4.214393615722656, 0.8411712646484375, -0.9138221740722656, 0.7355499267578125, 0.026611328125, 9.392082214355469, -0.5754051208496094, 1.7112503051757812, -0.189056396484375, 11.78033447265625, 8.002437591552734, 2.2941436767578125, 4.390571594238281, 2.3078651428222656, 4.8390350341796875, 4.148151397705078, -1.2911872863769531, 10.442886352539062, 6.605777740478516, 4.332582473754883, -0.953125, 0.0195465087890625, 3.9593658447265625, 0.7625656127929688, 7.88348388671875, 0.639007568359375, 6.461277008056641, 0.7295875549316406, 5.654014587402344, 21.270645141601562, 3.0466747283935547, 0.3275947570800781, 5.398895263671875, 3.5117263793945312, 15.163040161132812, 1.4156570434570312, 15.841949462890625, 5.137855529785156, 2.1895828247070312, 2.4465789794921875, 0.6805076599121094, 3.936267852783203, 9.488204956054688, 1.2089500427246094, 0.24602890014648438, 5.8630218505859375, -4.1170654296875, -0.2843761444091797, 18.852676391601562, 14.06219482421875, 7.0692138671875, 7.644428253173828, -0.8622550964355469, 1.533285140991211, 6.754798889160156, 0.007844924926757812, 0.1615142822265625, 2.1605072021484375, -6.4423828125, 3.0325088500976562, -2.0386962890625, 3.2540283203125, -0.465057373046875], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000051.npy"}
|
|
{"epoch": 0.07488986784140969, "step": 52, "batch_size": 64, "mean": 6.10667610168457, "std": 5.13358736038208, "min": -2.4741363525390625, "p10": 0.7473766326904298, "median": 5.011199951171875, "p90": 13.097531127929688, "max": 22.4481201171875, "pos_frac": 0.90625, "sample": [1.6591644287109375, 12.450233459472656, 4.389839172363281, 13.01043701171875, 7.416328430175781, 11.038665771484375, 2.9737701416015625, 7.554443359375, 7.42694091796875, 14.175689697265625, -0.30821990966796875, 13.2200927734375, 5.030372619628906, 6.2530975341796875, 9.273019790649414, 8.359991073608398, 2.226287841796875, 5.268796920776367, 17.543190002441406, 9.518402099609375, 3.2830467224121094, 1.11920166015625, 12.016700744628906, 3.190418243408203, -0.015674591064453125, 7.929527282714844, 3.1806793212890625, 1.9121437072753906, 9.109209060668945, 22.4481201171875, 10.36956787109375, 1.887847900390625, 11.180267333984375, 5.172782897949219, 4.0601654052734375, 13.806427001953125, 4.431526184082031, -1.30218505859375, 4.992027282714844, -2.28887939453125, -2.4741363525390625, 2.2409439086914062, 6.608724594116211, 5.697746276855469, 2.3130950927734375, 10.05755615234375, 4.421529769897461, 13.134857177734375, 7.688385009765625, 1.8335914611816406, 0.7170143127441406, 3.3550262451171875, 3.6994552612304688, 4.893558502197266, 3.2333831787109375, 10.084484100341797, 6.408439636230469, 0.8182220458984375, 18.08966827392578, -1.6133270263671875, 6.565940856933594, 4.147312164306641, 2.576730728149414, 1.365621566772461], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000052.npy"}
|
|
{"epoch": 0.0763582966226138, "step": 53, "batch_size": 64, "mean": 6.196445941925049, "std": 6.871194839477539, "min": -3.1633262634277344, "p10": 0.3042129516601565, "median": 4.06745719909668, "p90": 16.018505859375, "max": 30.336044311523438, "pos_frac": 0.921875, "sample": [-1.4353656768798828, 3.5198135375976562, 4.0748291015625, 1.790496826171875, 0.83929443359375, 4.060085296630859, 14.065834045410156, 0.15365219116210938, 8.268470764160156, -2.37689208984375, 2.0766448974609375, 29.40130615234375, -3.1633262634277344, 0.21463775634765625, 6.806846618652344, 7.980625152587891, 1.695037841796875, 11.848426818847656, 3.0850677490234375, 12.869392395019531, 4.225788116455078, 5.9315948486328125, 12.946502685546875, 7.08795166015625, 16.81926727294922, 1.3947906494140625, 5.1578521728515625, 0.7843475341796875, 5.662540435791016, 1.5917949676513672, 18.491607666015625, 8.814228057861328, 7.359477996826172, 8.273155212402344, 3.8950157165527344, 0.5132217407226562, 0.9365081787109375, 4.327419281005859, 1.0035057067871094, 4.488222122192383, 3.349090576171875, 8.252416610717773, 1.4817581176757812, 15.355484008789062, -2.9245834350585938, 18.791580200195312, 18.593505859375, 3.348705291748047, 5.407958984375, 2.993610382080078, 2.1655025482177734, 4.5207366943359375, 3.0412979125976562, 1.6802215576171875, 4.016426086425781, 4.011173248291016, 3.229921340942383, 30.336044311523438, 2.8242874145507812, 16.22442626953125, 5.061382293701172, 4.422904968261719, 15.53802490234375, -0.6290054321289062], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000053.npy"}
|
|
{"epoch": 0.07782672540381791, "step": 54, "batch_size": 64, "mean": 5.335931301116943, "std": 5.094736099243164, "min": -1.0011577606201172, "p10": 0.15695953369140628, "median": 3.8762474060058594, "p90": 12.682405090332036, "max": 21.222320556640625, "pos_frac": 0.953125, "sample": [7.599365234375, 5.6425323486328125, 9.180679321289062, 1.0437641143798828, 1.1417579650878906, 2.680267333984375, 0.12120437622070312, 2.594013214111328, 6.595405578613281, 8.229991912841797, 1.0102176666259766, 6.206085205078125, 13.196212768554688, 2.1870899200439453, 3.9702911376953125, 11.30364990234375, 0.04555511474609375, 5.241783142089844, 5.213298797607422, 5.647186279296875, 0.148834228515625, 1.63177490234375, 17.420249938964844, 0.00069427490234375, 7.1273651123046875, 0.1759185791015625, 4.899684906005859, 4.772552490234375, 6.5252532958984375, 6.690219879150391, 1.8901023864746094, -0.47708892822265625, 7.665031433105469, -0.060123443603515625, 1.2945537567138672, 3.7822036743164062, 13.222587585449219, 2.5120391845703125, 6.4526824951171875, 13.948394775390625, 1.813161849975586, 2.584674835205078, 21.222320556640625, 11.045455932617188, -1.0011577606201172, 2.9882755279541016, 0.4490318298339844, 13.71197509765625, 0.5721988677978516, 3.7042999267578125, 1.2848129272460938, 4.587625503540039, 11.006317138671875, 1.8868408203125, 2.62701416015625, 19.678863525390625, 1.2250137329101562, 4.690517425537109, 1.8636627197265625, 11.4610595703125, 2.1625843048095703, 10.665771484375, 1.3124771118164062, 11.4835205078125], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000054.npy"}
|
|
{"epoch": 0.07929515418502203, "step": 55, "batch_size": 64, "mean": 6.3703436851501465, "std": 7.454952716827393, "min": -11.680877685546875, "p10": -1.4643070220947265, "median": 5.5335893630981445, "p90": 16.664465332031252, "max": 28.782196044921875, "pos_frac": 0.8125, "sample": [7.536415100097656, 9.836265563964844, 4.296134948730469, -4.86944580078125, 25.3643798828125, -1.9603347778320312, 6.824741363525391, 4.465999603271484, 17.684783935546875, -0.8387451171875, 6.634193420410156, 6.091121673583984, 2.3134307861328125, 10.815208435058594, 20.036346435546875, 9.930007934570312, 4.5458526611328125, 9.853958129882812, 28.782196044921875, 3.5940818786621094, 11.703689575195312, -1.340301513671875, 4.224151611328125, 8.80886459350586, 23.356521606445312, 0.10161781311035156, 16.229827880859375, 0.425445556640625, 0.7026615142822266, 8.427066802978516, 4.9890899658203125, -3.074066162109375, 6.998882293701172, 0.2716026306152344, 12.917709350585938, 4.38189697265625, -1.5174522399902344, -0.3651313781738281, 0.3590660095214844, -5.535835266113281, 9.872673034667969, 3.0422706604003906, 2.421844482421875, 15.854591369628906, 6.7823028564453125, -1.5709075927734375, 0.7822113037109375, 11.094528198242188, 5.66124153137207, -11.680877685546875, 3.404111862182617, 16.850738525390625, 7.117942810058594, 4.505062103271484, 7.765727996826172, 6.183509826660156, -0.9472274780273438, 7.769935607910156, 7.47882080078125, 14.361770629882812, 5.405937194824219, -0.11018753051757812, 5.322059631347656, 17.332015991210938], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000055.npy"}
|
|
{"epoch": 0.08076358296622614, "step": 56, "batch_size": 64, "mean": 6.090175628662109, "std": 7.333968162536621, "min": -10.272052764892578, "p10": -1.7979156494140622, "median": 5.430574417114258, "p90": 16.190589904785163, "max": 27.434188842773438, "pos_frac": 0.796875, "sample": [-2.4725341796875, 7.15020751953125, -4.3168792724609375, 9.158428192138672, -0.6341552734375, -10.272052764892578, 10.684356689453125, -1.5947494506835938, 3.4669113159179688, -3.5801620483398438, 3.6470870971679688, 1.9619140625, 5.666023254394531, 17.126708984375, 2.5951194763183594, 0.2494964599609375, -0.5906867980957031, 11.387809753417969, 10.440967559814453, 8.576873779296875, -1.8849868774414062, 11.830337524414062, 3.0938491821289062, 23.512664794921875, 9.454681396484375, 17.106689453125, 1.2360897064208984, 0.2974262237548828, 9.78609848022461, 2.7420272827148438, 7.337089538574219, 7.8780975341796875, 10.014572143554688, 1.6518287658691406, 11.104736328125, 12.04302978515625, -0.054714202880859375, 19.938888549804688, 7.696205139160156, 6.9271240234375, 5.932216644287109, 4.806194305419922, 3.0794429779052734, 20.095306396484375, 7.3388824462890625, 13.175041198730469, 6.195056915283203, 10.76495361328125, 0.9259872436523438, 27.434188842773438, 1.1261730194091797, -0.0019283294677734375, 3.96929931640625, 14.053024291992188, -7.4898834228515625, 3.4214134216308594, -0.5986785888671875, 21.70539093017578, 5.195125579833984, -3.8169937133789062, 3.7952041625976562, 1.9076004028320312, 6.267314910888672, 10.128463745117188], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000056.npy"}
|
|
{"epoch": 0.08223201174743025, "step": 57, "batch_size": 64, "mean": 6.775736331939697, "std": 6.503594875335693, "min": -10.11834716796875, "p10": 0.33493595123291015, "median": 6.557384490966797, "p90": 15.12040710449219, "max": 23.683395385742188, "pos_frac": 0.921875, "sample": [9.259212493896484, 3.315044403076172, 0.3317413330078125, -2.573760986328125, 10.81903076171875, -10.11834716796875, 12.270126342773438, 0.3423900604248047, 14.674407958984375, 6.473228454589844, 17.251815795898438, 3.659137725830078, 23.683395385742188, 3.2110595703125, 6.7128143310546875, 7.19183349609375, -1.1799030303955078, 3.3476524353027344, 6.64154052734375, 21.1097412109375, 10.916091918945312, 17.056076049804688, 11.936172485351562, 3.7368736267089844, 6.894147872924805, 1.5678329467773438, 14.815139770507812, 5.103792190551758, 3.7732295989990234, 2.9478607177734375, 0.86456298828125, 4.003458023071289, 3.4278106689453125, 0.08352851867675781, 7.112213134765625, 8.506393432617188, 0.4589996337890625, 1.5539894104003906, 2.505767822265625, 9.788314819335938, 11.697738647460938, 2.057180404663086, 10.07843017578125, 5.2022247314453125, 9.8831787109375, -2.999786376953125, 8.742485046386719, 6.7803802490234375, 9.341522216796875, 3.9409847259521484, 2.685455322265625, 18.4754638671875, 5.384746551513672, 3.97021484375, 7.5888519287109375, 4.512092590332031, 0.8017730712890625, 13.233230590820312, 13.267082214355469, -7.078117370605469, 15.251235961914062, 21.377151489257812, 8.032411575317383, 7.946784973144531], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000057.npy"}
|
|
{"epoch": 0.08370044052863436, "step": 58, "batch_size": 64, "mean": 7.584902763366699, "std": 8.055214881896973, "min": -9.197669982910156, "p10": -0.049985504150390575, "median": 5.583106994628906, "p90": 18.31896591186524, "max": 29.879974365234375, "pos_frac": 0.875, "sample": [15.940887451171875, -2.398855209350586, 0.4338359832763672, 4.879573822021484, 4.44854736328125, 4.442081451416016, 20.916725158691406, 1.773895263671875, 15.546577453613281, 2.762493133544922, 6.402355194091797, 7.154457092285156, 7.942264556884766, 4.190385818481445, 2.3587112426757812, 13.073974609375, 11.55462646484375, 9.559883117675781, 4.63237190246582, 2.5474987030029297, 8.236114501953125, 15.216049194335938, 14.428794860839844, -2.2139205932617188, 12.96795654296875, 1.6401519775390625, 9.981575012207031, 6.938257217407227, 23.278823852539062, 4.556543350219727, 29.879974365234375, 8.887016296386719, 6.286640167236328, -0.00020599365234375, 15.960174560546875, 1.0692100524902344, 15.784294128417969, 4.307430267333984, 3.29058837890625, -1.6769256591796875, 2.3692169189453125, 2.8406219482421875, 25.00152587890625, -2.3129196166992188, 10.140869140625, -0.071319580078125, 1.3627262115478516, 8.007171630859375, -6.6688232421875, 25.715362548828125, 1.9683570861816406, 6.947731018066406, 0.8199825286865234, 15.024024963378906, 11.523082733154297, -9.197669982910156, 23.435928344726562, 2.795318603515625, 2.786895751953125, 7.666587829589844, 16.349334716796875, 1.7848644256591797, 1.0009574890136719, 19.16309356689453], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000058.npy"}
|
|
{"epoch": 0.08516886930983847, "step": 59, "batch_size": 64, "mean": 8.145567893981934, "std": 8.525816917419434, "min": -8.147247314453125, "p10": -1.575761795043945, "median": 7.083414077758789, "p90": 20.996388244628907, "max": 34.13417053222656, "pos_frac": 0.875, "sample": [8.88677978515625, 9.360694885253906, 16.83563232421875, 21.17523193359375, 17.39960479736328, 4.451152801513672, 34.13417053222656, 22.292205810546875, 7.450450897216797, 17.352981567382812, 0.8142929077148438, 7.687938690185547, -4.361236572265625, 2.8635387420654297, 24.759918212890625, -1.4094429016113281, 2.506244659423828, 15.144241333007812, 6.489738464355469, 21.48058319091797, 1.1849594116210938, 2.2383499145507812, 0.7513427734375, 1.4613304138183594, 5.027671813964844, 27.39111328125, 2.4735965728759766, 2.0588531494140625, 8.845401763916016, -8.147247314453125, 10.084762573242188, -2.3654251098632812, 8.397560119628906, 8.586822509765625, 9.671455383300781, 19.981849670410156, 3.4168472290039062, 12.960418701171875, 7.05950927734375, 4.2593536376953125, -6.074806213378906, -3.7109222412109375, 6.35148811340332, -1.6470413208007812, 4.728673934936523, 10.486824035644531, 7.449167251586914, 3.171905517578125, 8.361579895019531, 2.921497344970703, 1.8458881378173828, 7.107318878173828, 3.341276168823242, 11.06838607788086, 6.222969055175781, 15.401786804199219, 10.15240478515625, 4.153984069824219, 3.7358856201171875, -3.1652297973632812, 18.743446350097656, 20.579086303710938, 7.646766662597656, 21.790740966796875], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000059.npy"}
|
|
{"epoch": 0.08663729809104258, "step": 60, "batch_size": 64, "mean": 6.531972885131836, "std": 7.570163726806641, "min": -15.8360595703125, "p10": -2.0786291122436524, "median": 7.122047424316406, "p90": 13.552742385864258, "max": 27.54016876220703, "pos_frac": 0.828125, "sample": [8.532096862792969, -5.46905517578125, 2.0833187103271484, 12.754966735839844, 0.5923919677734375, 3.4095916748046875, 12.264785766601562, 8.772598266601562, 1.6288833618164062, -15.8360595703125, 7.202262878417969, -1.4729461669921875, 2.0380115509033203, 4.092462539672852, 12.050910949707031, 6.821136474609375, 4.092376708984375, 11.554023742675781, 5.1298065185546875, 11.458351135253906, 13.3575439453125, 13.606063842773438, 21.029083251953125, 9.433609008789062, 9.476661682128906, 8.009605407714844, -0.9448814392089844, 7.349449157714844, 11.75421142578125, 0.16017723083496094, 10.262065887451172, 20.105865478515625, 3.6827964782714844, 6.876453399658203, 8.517948150634766, -2.0503787994384766, 11.441116333007812, 8.952255249023438, 2.8113021850585938, -6.309120178222656, 13.428325653076172, 3.4703636169433594, 5.6298828125, 9.398811340332031, 4.5316009521484375, -1.1878585815429688, -11.578533172607422, 17.855865478515625, 22.58782958984375, 12.193511962890625, 13.632286071777344, 3.3780441284179688, -2.9774208068847656, -2.0907363891601562, 11.406301498413086, 7.041831970214844, 27.54016876220703, 10.578964233398438, 1.445831298828125, 9.998868942260742, -2.1269912719726562, 8.70245361328125, 4.816770553588867, 1.1483402252197266], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000060.npy"}
|
|
{"epoch": 0.0881057268722467, "step": 61, "batch_size": 64, "mean": 8.05933666229248, "std": 11.084214210510254, "min": -10.902458190917969, "p10": -1.657727813720703, "median": 5.835548400878906, "p90": 20.50462875366211, "max": 62.00537109375, "pos_frac": 0.78125, "sample": [2.8493385314941406, 1.647256851196289, -0.845733642578125, 1.3033676147460938, 15.547859191894531, -0.3424549102783203, -0.10154533386230469, 8.436885833740234, -1.8141098022460938, 29.592681884765625, 23.784332275390625, 20.989639282226562, 27.989700317382812, 14.488349914550781, 8.986404418945312, 10.115413665771484, -2.1258926391601562, 1.9036884307861328, 12.243194580078125, 12.84674072265625, 3.2078323364257812, 5.560214996337891, 17.206863403320312, 9.844097137451172, 6.821540832519531, 7.443321228027344, 6.110881805419922, 6.4460906982421875, 9.879707336425781, 9.213211059570312, -0.42742156982421875, -1.044952392578125, 13.689613342285156, 17.516067504882812, 18.55645751953125, 24.452880859375, 19.777023315429688, 1.9848861694335938, -1.688262939453125, 14.414932250976562, 2.613018035888672, 0.23664474487304688, 14.425064086914062, 3.1195335388183594, 1.096933364868164, 20.81645965576172, 0.6934738159179688, 3.5310211181640625, 62.00537109375, 14.318763732910156, 13.665046691894531, -1.5864791870117188, -10.902458190917969, 13.223854064941406, 2.7327194213867188, -5.717842102050781, 5.374824523925781, 3.97039794921875, -8.267631530761719, 2.7848663330078125, 7.591552734375, -6.386798858642578, -1.3707923889160156, 1.3698978424072266], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000061.npy"}
|
|
{"epoch": 0.08957415565345081, "step": 62, "batch_size": 64, "mean": 6.027884006500244, "std": 7.976193904876709, "min": -10.371368408203125, "p10": -3.679120826721191, "median": 5.627972602844238, "p90": 16.619368362426762, "max": 32.38800048828125, "pos_frac": 0.859375, "sample": [9.68368911743164, 1.5255851745605469, 0.5298061370849609, 11.205802917480469, 5.813549041748047, 12.484222412109375, 7.759967803955078, 1.0753822326660156, 10.01983642578125, -6.1710357666015625, 15.898895263671875, 6.984661102294922, 1.954305648803711, 0.3963890075683594, 10.388053894042969, 3.8531646728515625, 10.555084228515625, 6.413787841796875, 0.04622077941894531, 5.309856414794922, 7.6353912353515625, 7.807399749755859, 1.9876632690429688, 5.799642562866211, -4.803009033203125, 9.58843994140625, 4.577486038208008, 9.402931213378906, -0.4014091491699219, 32.38800048828125, 18.151329040527344, 9.041305541992188, 6.810638427734375, 2.2802734375, 1.4131088256835938, 6.3823394775390625, 1.5691757202148438, 19.387252807617188, -10.371368408203125, -5.316398620605469, -7.5575408935546875, 0.9218406677246094, 1.3309745788574219, 0.9523468017578125, 1.7450180053710938, 9.60572624206543, 16.928142547607422, 10.195438385009766, -3.874074935913086, 17.264923095703125, 9.362430572509766, 5.456302642822266, -7.5995330810546875, 3.756114959716797, 14.137870788574219, 4.1671600341796875, 3.3599472045898438, 3.8924636840820312, 28.957962036132812, 22.037612915039062, 7.445917129516602, 0.8918304443359375, -3.2242279052734375, 6.5725250244140625], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000062.npy"}
|
|
{"epoch": 0.09104258443465492, "step": 63, "batch_size": 64, "mean": 8.255392074584961, "std": 8.680863380432129, "min": -14.340324401855469, "p10": -0.5687894821166986, "median": 7.90765380859375, "p90": 18.07299194335938, "max": 35.685272216796875, "pos_frac": 0.890625, "sample": [26.978981018066406, 8.697563171386719, 4.781335830688477, 9.812889099121094, 3.30731201171875, 1.5037155151367188, 5.891395568847656, -7.9737548828125, 7.8908843994140625, 7.1635589599609375, -14.340324401855469, 16.967567443847656, 9.422508239746094, 7.9244232177734375, 3.085153579711914, -0.8845119476318359, 18.889556884765625, 4.744159698486328, 14.740341186523438, 21.32720184326172, 13.408546447753906, 14.024200439453125, 6.993827819824219, 0.16789627075195312, -0.9308319091796875, 12.173635482788086, -3.9967117309570312, 0.20784378051757812, 6.129234313964844, 3.485513687133789, 13.260726928710938, 9.212677001953125, 7.775764465332031, 10.054805755615234, 12.439155578613281, 13.31329345703125, 15.516273498535156, 15.048622131347656, 5.630729675292969, 2.2515945434570312, 0.5880889892578125, 14.520614624023438, 2.7602901458740234, 5.448692321777344, -12.50750732421875, 14.56574821472168, 8.782012939453125, 1.66632080078125, 2.635528564453125, 8.034317016601562, 3.70562744140625, 15.278648376464844, 20.18560791015625, 8.135307312011719, 18.54674530029297, 1.0485649108886719, 9.490346908569336, 35.685272216796875, -2.8606796264648438, 15.958576202392578, 5.0360870361328125, 27.465293884277344, 5.323188781738281, 12.755661010742188], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000063.npy"}
|
|
{"epoch": 0.09251101321585903, "step": 64, "batch_size": 64, "mean": 9.568205833435059, "std": 11.05217456817627, "min": -19.596954345703125, "p10": -1.204046630859375, "median": 8.09437370300293, "p90": 23.031072998046877, "max": 52.62152099609375, "pos_frac": 0.828125, "sample": [1.6201019287109375, 18.935081481933594, 29.710922241210938, 8.606582641601562, 5.590099334716797, 14.055862426757812, 3.1501598358154297, 17.554153442382812, -0.8387737274169922, 7.367826461791992, 2.3367996215820312, 11.474067687988281, 0.8245658874511719, 5.1730499267578125, 12.717376708984375, 33.29429626464844, 4.920963287353516, -1.8458232879638672, 19.602706909179688, 7.9199981689453125, 12.142520904541016, -3.3052291870117188, 30.922119140625, 8.268749237060547, 0.05966758728027344, 5.796512603759766, -1.3930339813232422, 7.632579803466797, -19.596954345703125, -0.6673755645751953, 8.272979736328125, -1.3735389709472656, 52.62152099609375, 5.915468215942383, 1.7342967987060547, 11.615283966064453, 18.21527099609375, 22.564605712890625, 11.621307373046875, 6.835567474365234, 6.185770034790039, 24.618850708007812, 13.005302429199219, 9.807762145996094, -1.220306396484375, -11.155311584472656, 26.000015258789062, 3.477081298828125, 23.230987548828125, 1.072509765625, 16.325347900390625, 13.730789184570312, 13.544792175292969, -1.166107177734375, 13.36212158203125, -0.16375350952148438, 5.522075653076172, 8.453163146972656, 15.197509765625, 13.185195922851562, 1.7953720092773438, 12.270401000976562, 19.306785583496094, 5.926532745361328], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000064.npy"}
|
|
{"epoch": 0.09397944199706314, "step": 65, "batch_size": 64, "mean": 9.581817626953125, "std": 10.818757057189941, "min": -25.235275268554688, "p10": -0.6132884979248046, "median": 8.034042358398438, "p90": 24.91547164916993, "max": 33.175201416015625, "pos_frac": 0.859375, "sample": [33.175201416015625, 26.873992919921875, 19.425491333007812, 12.64300537109375, 6.221746444702148, 9.652603149414062, 0.4937744140625, 15.105857849121094, 3.1470298767089844, 21.94884490966797, 17.218955993652344, 1.7868518829345703, -0.2223663330078125, 25.72344970703125, 30.772323608398438, 0.9726982116699219, 10.736076354980469, 2.5719032287597656, -4.940086364746094, 5.494590759277344, 1.1494636535644531, 19.775360107421875, 6.4012451171875, -25.235275268554688, 18.225723266601562, 9.2117919921875, -4.080230712890625, 7.814094543457031, 14.4041748046875, 5.510898590087891, 5.72186279296875, 5.678195953369141, -2.4434776306152344, 5.847597122192383, 31.211868286132812, 8.253990173339844, 23.030189514160156, 16.230419158935547, 27.20612335205078, 8.614070892333984, -0.4500885009765625, -13.651939392089844, 10.0902099609375, 2.1419925689697266, 7.455291748046875, 20.282546997070312, 25.87590789794922, -2.356201171875, 5.17950439453125, 21.722854614257812, 12.97869873046875, 4.0310821533203125, 22.4215087890625, -0.6832313537597656, 14.611358642578125, 8.65887451171875, 6.572029113769531, 2.162841796875, 17.632247924804688, 10.528450012207031, 0.0058441162109375, 8.37553596496582, 0.5445346832275391, 7.776445388793945], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000065.npy"}
|
|
{"epoch": 0.09544787077826726, "step": 66, "batch_size": 64, "mean": 8.782968521118164, "std": 10.66642951965332, "min": -10.6619873046875, "p10": -0.6878326416015623, "median": 7.386231422424316, "p90": 20.918873977661136, "max": 50.22401428222656, "pos_frac": 0.859375, "sample": [8.026924133300781, 50.22401428222656, 0.025970458984375, 16.454421997070312, 14.335914611816406, 12.560127258300781, 2.256284713745117, 0.008472442626953125, 10.656633377075195, 7.720550537109375, -9.457611083984375, 2.381610870361328, 7.051912307739258, -0.7736129760742188, 13.118724822998047, 4.084075927734375, 4.917291641235352, 0.20357894897460938, 2.2717208862304688, 21.2655029296875, 8.45098876953125, 10.576913833618164, 0.25384521484375, 11.01373291015625, 18.66610336303711, 2.9068756103515625, 5.245796203613281, -3.596710205078125, -4.666967391967773, -10.6619873046875, 11.172370910644531, 18.368515014648438, 20.231945037841797, 8.052566528320312, 3.761411666870117, 13.218368530273438, 6.540178298950195, -0.38246917724609375, 2.258546829223633, 15.758811950683594, 11.157585144042969, 3.8846511840820312, -0.48767852783203125, 7.984504699707031, 6.420515060424805, -8.486885070800781, 10.802787780761719, 1.5302886962890625, -4.4323272705078125, 15.20843505859375, 4.100685119628906, 21.213272094726562, 28.438980102539062, 23.13013458251953, 5.0108642578125, 18.788360595703125, 2.0980606079101562, 8.76511001586914, 15.680191040039062, 2.4937076568603516, 34.912628173828125, 1.4836349487304688, 33.085418701171875, 14.82568359375], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000066.npy"}
|
|
{"epoch": 0.09691629955947137, "step": 67, "batch_size": 64, "mean": 9.069478988647461, "std": 10.762707710266113, "min": -12.9495849609375, "p10": -0.7371688842773436, "median": 6.681648254394531, "p90": 22.67923355102539, "max": 52.007476806640625, "pos_frac": 0.828125, "sample": [23.032859802246094, 4.202781677246094, 22.845703125, 0.5028457641601562, 2.338136672973633, -0.5873870849609375, 18.112815856933594, 5.618009567260742, -7.1560211181640625, 2.2313461303710938, 18.140918731689453, 10.453651428222656, 4.202919006347656, 17.211349487304688, -0.351531982421875, -4.6927642822265625, 8.871246337890625, 10.652130126953125, 1.8982009887695312, -12.9495849609375, 22.34851837158203, 8.303955078125, -0.801361083984375, -0.3792266845703125, 6.679924011230469, 2.4553070068359375, 22.820968627929688, 6.91230583190918, -0.54656982421875, 6.729766845703125, 3.8583831787109375, 16.143108367919922, 7.5053863525390625, 3.269723892211914, 2.960205078125, 10.544052124023438, 15.562925338745117, 6.457498550415039, 1.4185791015625, -5.3638153076171875, 5.869804382324219, 18.42267608642578, 4.9435882568359375, 3.8866519927978516, 28.462539672851562, 15.63201904296875, 6.683372497558594, 19.1190185546875, 14.947883605957031, 14.740997314453125, 12.110527038574219, 5.79876708984375, 12.448480606079102, 35.745086669921875, 6.046440124511719, 1.1362171173095703, 52.007476806640625, 3.2244873046875, -2.8410263061523438, 27.440460205078125, 20.836776733398438, 7.827251434326172, -4.285186767578125, 8.785127639770508], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000067.npy"}
|
|
{"epoch": 0.09838472834067548, "step": 68, "batch_size": 64, "mean": 8.60112190246582, "std": 8.812952041625977, "min": -7.835205078125, "p10": 0.24949703216552768, "median": 6.3222246170043945, "p90": 21.35119323730469, "max": 39.48231506347656, "pos_frac": 0.90625, "sample": [-5.131767272949219, 3.345977783203125, 25.830841064453125, 8.708152770996094, 5.874444961547852, -6.161865234375, 9.472892761230469, 24.12023162841797, 1.2694854736328125, 0.5721225738525391, 14.062973022460938, 16.620025634765625, 18.956161499023438, 12.318363189697266, 10.8966064453125, 0.11122894287109375, 6.711643218994141, 13.858612060546875, 3.2746734619140625, 8.816200256347656, -1.0366439819335938, 4.893232345581055, 5.491498947143555, 2.1299591064453125, 7.746942520141602, 13.068679809570312, 20.617141723632812, 8.906173706054688, 9.083507537841797, 20.144119262695312, 21.665786743164062, 24.222537994384766, 1.7819366455078125, 6.322601318359375, 10.666328430175781, 1.7175407409667969, -0.039661407470703125, 6.321847915649414, 2.431793212890625, 5.547508239746094, 17.77078628540039, 2.9230308532714844, 11.202383041381836, -7.835205078125, 6.515434265136719, 5.872039794921875, 4.040155410766602, 22.72631072998047, 3.851165771484375, 1.4127159118652344, 1.4927120208740234, 39.48231506347656, 26.131553649902344, 17.583541870117188, 13.603324890136719, 0.6424026489257812, 1.7936286926269531, 5.355920791625977, 11.314804077148438, -2.0553512573242188, 5.231422424316406, 2.4235992431640625, 5.326377868652344, 8.456920623779297], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000068.npy"}
|
|
{"epoch": 0.09985315712187959, "step": 69, "batch_size": 64, "mean": 10.238265037536621, "std": 10.117827415466309, "min": -5.6052398681640625, "p10": -0.24796867370605463, "median": 7.218549728393555, "p90": 21.54448623657227, "max": 46.932220458984375, "pos_frac": 0.875, "sample": [5.344085693359375, 7.135280609130859, 32.608642578125, 9.150535583496094, 12.359260559082031, 16.879104614257812, -5.6052398681640625, -1.56787109375, -0.19195938110351562, 4.445465087890625, 5.13836669921875, 9.441593170166016, -0.27197265625, 9.779823303222656, 6.596954345703125, 16.62985610961914, 27.416885375976562, 7.363006591796875, 17.839492797851562, 16.100791931152344, -1.1103477478027344, 27.075286865234375, 14.503028869628906, 10.374454498291016, 19.972030639648438, 14.509307861328125, 4.6210174560546875, 16.846946716308594, 13.798751831054688, 6.004179000854492, 1.0886077880859375, 0.21409225463867188, 20.099632263183594, 46.932220458984375, 4.543704986572266, 20.564720153808594, 6.2743988037109375, 14.893798828125, 18.267333984375, 0.7439613342285156, 1.9747085571289062, 17.510391235351562, 10.581809997558594, 1.8556671142578125, -0.7993621826171875, 15.450042724609375, 6.857364654541016, 37.20600891113281, 5.020111083984375, 8.355924606323242, 6.40532112121582, -4.304046630859375, 6.6768341064453125, 3.2476806640625, 2.1760025024414062, 7.30181884765625, 21.964385986328125, 22.22418212890625, 0.878143310546875, 20.390151977539062, 1.7740821838378906, 1.4573688507080078, 4.957231521606445, -0.7220611572265625], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000069.npy"}
|
|
{"epoch": 0.1013215859030837, "step": 70, "batch_size": 64, "mean": 11.006942749023438, "std": 11.691818237304688, "min": -10.111907958984375, "p10": -1.4808921813964842, "median": 10.55581283569336, "p90": 26.975953674316415, "max": 43.357025146484375, "pos_frac": 0.84375, "sample": [6.348209381103516, 1.2432098388671875, 12.140815734863281, 20.793701171875, 23.604095458984375, 13.074228286743164, -8.265869140625, -2.14569091796875, 14.154518127441406, 4.22076416015625, -1.6017990112304688, 13.966011047363281, -2.7002506256103516, 9.216522216796875, 40.48890686035156, 1.810262680053711, 10.150070190429688, 27.889617919921875, 12.382600784301758, 43.357025146484375, 11.969085693359375, 10.961555480957031, 1.5924606323242188, 30.63262939453125, 0.4389190673828125, 17.6571044921875, 20.242507934570312, 13.905145645141602, -1.7132740020751953, 7.287179946899414, 6.251495361328125, 28.148681640625, 2.2610321044921875, 0.6490440368652344, 1.1138992309570312, -0.40966796875, 14.304935455322266, 2.9471511840820312, 11.981452941894531, -1.9901256561279297, 3.5781898498535156, 3.9508399963378906, -1.1987762451171875, 2.658843994140625, 11.409017562866211, 1.7843780517578125, 19.881134033203125, 24.844070434570312, -10.111907958984375, 4.956451416015625, 18.257293701171875, 3.3343276977539062, 1.7174797058105469, 34.79048156738281, 14.095882415771484, -0.2840919494628906, 36.041778564453125, 11.401012420654297, 22.97662353515625, 17.227664947509766, 3.6131134033203125, 24.119216918945312, 14.058303833007812, 22.98480224609375], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000070.npy"}
|
|
{"epoch": 0.1027900146842878, "step": 71, "batch_size": 64, "mean": 12.710762977600098, "std": 12.758475303649902, "min": -9.655279159545898, "p10": 0.6763015747070323, "median": 10.556608200073242, "p90": 23.575215148925786, "max": 53.53399658203125, "pos_frac": 0.90625, "sample": [-6.388496398925781, 44.325836181640625, 6.964702606201172, 1.7327423095703125, 6.410831451416016, 13.172550201416016, 6.845329284667969, 13.017646789550781, 18.272785186767578, 5.804574966430664, 22.871658325195312, 7.540063858032227, 19.320281982421875, 4.522489547729492, 3.1979904174804688, 7.473289489746094, 11.518672943115234, 16.447479248046875, -0.545013427734375, 2.3252086639404297, 4.668663024902344, 46.7001953125, 7.913780212402344, 7.306419372558594, 17.295860290527344, 16.10857391357422, 5.945903778076172, 12.82083511352539, 13.72732162475586, -8.887176513671875, 4.87799072265625, 13.1025390625, 11.607589721679688, -0.3330974578857422, 5.330577850341797, 18.18653106689453, 20.996261596679688, 0.223541259765625, 22.623245239257812, 16.394088745117188, 5.580131530761719, 13.592147827148438, 7.003471374511719, 18.937591552734375, 16.537216186523438, 19.0501708984375, 4.378211975097656, 38.42120361328125, 50.831024169921875, 53.53399658203125, 2.5470352172851562, 23.876739501953125, -9.655279159545898, -1.2977466583251953, 14.786083221435547, 6.774105072021484, 7.542919158935547, 11.560009002685547, 19.44260025024414, 16.574356079101562, 32.734375, 3.5829849243164062, 6.120706558227539, 9.59454345703125], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000071.npy"}
|
|
{"epoch": 0.10425844346549193, "step": 72, "batch_size": 64, "mean": 13.297285079956055, "std": 13.629204750061035, "min": -14.569595336914062, "p10": -2.169756889343258, "median": 11.335516929626465, "p90": 33.445353698730486, "max": 57.644195556640625, "pos_frac": 0.890625, "sample": [19.34612274169922, 16.07573127746582, 5.855236053466797, 14.246925354003906, 6.158973693847656, 3.8755340576171875, 9.242034912109375, 28.078018188476562, -14.569595336914062, 26.50140380859375, 16.230239868164062, 15.02154541015625, 7.557645797729492, 1.75323486328125, 17.398021697998047, 9.816452026367188, 13.626632690429688, 26.364700317382812, 14.521812438964844, 35.23155212402344, 40.258453369140625, -3.576902389526367, 7.371513366699219, 10.855154037475586, 18.419227600097656, 3.0330352783203125, 19.005218505859375, 10.491199493408203, 11.815879821777344, 17.926162719726562, 2.9008560180664062, 5.244598388671875, 11.845001220703125, -14.412948608398438, 36.387115478515625, 29.277557373046875, 19.676406860351562, 7.6720123291015625, 22.834671020507812, -4.196205139160156, 9.340421676635742, 42.32952880859375, 57.644195556640625, -7.027545928955078, 10.080495834350586, 25.620941162109375, 6.949615478515625, 8.839157104492188, 1.1135826110839844, 36.89862060546875, -10.17474365234375, 4.749691009521484, 2.5587615966796875, 18.475143432617188, 14.140396118164062, 20.603469848632812, 9.949287414550781, 16.367233276367188, 37.17767333984375, 12.468944549560547, 6.714229583740234, 3.4951705932617188, -3.8493423461914062, 1.4011039733886719], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000072.npy"}
|
|
{"epoch": 0.10572687224669604, "step": 73, "batch_size": 64, "mean": 13.359804153442383, "std": 19.24289894104004, "min": -26.8802490234375, "p10": -9.420606040954588, "median": 10.272159576416016, "p90": 34.87802276611328, "max": 69.58973693847656, "pos_frac": 0.78125, "sample": [32.84938049316406, -13.747978210449219, 7.289188385009766, 15.53265380859375, 7.358695983886719, 37.864768981933594, -6.4074554443359375, 26.12334442138672, 8.284965515136719, -0.44803619384765625, 30.13616943359375, 34.167747497558594, 24.430709838867188, -1.0507698059082031, 0.8705596923828125, -15.177398681640625, 60.079833984375, -1.78472900390625, 11.367088317871094, 19.243858337402344, 17.449398040771484, 8.393440246582031, 10.480819702148438, 1.0862674713134766, 2.1738204956054688, 10.063499450683594, -18.84579086303711, 11.409297943115234, 31.405899047851562, 15.923965454101562, 13.768989562988281, 3.0958404541015625, 58.2586669921875, -12.208267211914062, 5.306262969970703, 31.99248504638672, 32.072265625, -10.116716384887695, 4.6091766357421875, 2.4212112426757812, -2.190887451171875, -4.617156982421875, -10.363037109375, 7.7697906494140625, 17.429122924804688, 17.506607055664062, 3.035825729370117, 5.502315521240234, 21.098556518554688, 45.63890075683594, 15.491241455078125, 6.449024200439453, 21.96282958984375, 6.1736907958984375, 15.120193481445312, -7.796348571777344, 15.15545654296875, 57.82749938964844, 27.660720825195312, 69.58973693847656, 3.121917724609375, 35.18242645263672, 19.436126708984375, -26.8802490234375], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000073.npy"}
|
|
{"epoch": 0.10719530102790015, "step": 74, "batch_size": 64, "mean": 14.197797775268555, "std": 21.06311798095703, "min": -40.176231384277344, "p10": -3.203385543823241, "median": 10.777204513549805, "p90": 32.791017913818365, "max": 123.98486328125, "pos_frac": 0.8125, "sample": [27.796554565429688, 9.626029968261719, 12.717941284179688, 0.1369800567626953, 25.12835693359375, 7.24053955078125, 10.504179000854492, 56.18274688720703, 2.0233287811279297, 13.616722106933594, 15.061126708984375, 10.500125885009766, 20.611705780029297, 1.6448822021484375, 17.762367248535156, 123.98486328125, 8.958686828613281, 30.7373046875, 16.149211883544922, 3.1545486450195312, 1.352029800415039, 51.43617248535156, 4.202613830566406, 33.671180725097656, 26.583786010742188, 10.786788940429688, -3.812702178955078, 0.3116302490234375, -0.7227611541748047, -19.33391571044922, 16.77985382080078, -1.247772216796875, -0.3755168914794922, 8.76519775390625, -40.176231384277344, 21.668739318847656, -1.9408493041992188, 26.754940032958984, 4.949270248413086, -3.7444725036621094, -10.001991271972656, 11.038703918457031, 20.032135009765625, 26.112747192382812, -0.3472709655761719, 46.53668212890625, 15.078872680664062, 2.9377593994140625, 8.316085815429688, 9.351848602294922, 10.767620086669922, 37.46775817871094, 18.982139587402344, 23.311187744140625, 38.737327575683594, 16.88994598388672, 5.701448440551758, 16.652862548828125, 26.949234008789062, 28.518508911132812, 0.001277923583984375, -3.797943115234375, -4.505237579345703, 14.48117446899414], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000074.npy"}
|
|
{"epoch": 0.10866372980910426, "step": 75, "batch_size": 64, "mean": 20.238035202026367, "std": 19.98708724975586, "min": -11.355880737304688, "p10": -2.2246305465698226, "median": 16.360244750976562, "p90": 48.267746734619145, "max": 75.7332763671875, "pos_frac": 0.859375, "sample": [-0.5232791900634766, 24.666961669921875, 17.344608306884766, 13.939064025878906, 19.728164672851562, 12.428388595581055, 11.357856750488281, 6.736503601074219, 44.97096252441406, 66.4283447265625, 32.3145751953125, 15.220428466796875, -6.8701324462890625, 28.44922637939453, 75.7332763671875, 15.29302978515625, 40.157501220703125, 62.61299133300781, 9.680280685424805, -0.26853179931640625, 43.94642639160156, 24.803985595703125, 15.683113098144531, 11.121879577636719, 17.22564697265625, 47.70104217529297, 48.5106201171875, 3.543792724609375, 8.79958724975586, 31.84072494506836, 19.198898315429688, 17.374435424804688, 4.9640655517578125, 66.67022705078125, -5.189792633056641, -2.9537811279296875, 25.691177368164062, -11.355880737304688, 20.934967041015625, 20.451942443847656, 5.9953155517578125, 13.316802978515625, -8.009849548339844, 8.203590393066406, -7.332401275634766, 28.47412109375, 2.7085819244384766, 12.332565307617188, 38.59326171875, 26.77197265625, 4.051548004150391, -6.28564453125, 9.258914947509766, 7.134574890136719, 16.642494201660156, 5.153358459472656, 9.153305053710938, 22.764251708984375, 16.675521850585938, 18.645782470703125, 38.30175018310547, 56.601654052734375, 61.6414794921875, 16.07799530029297], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000075.npy"}
|
|
{"epoch": 0.11013215859030837, "step": 76, "batch_size": 64, "mean": 13.0550537109375, "std": 15.522875785827637, "min": -40.72001647949219, "p10": -2.642675590515135, "median": 11.908500671386719, "p90": 30.704218673706055, "max": 67.81199645996094, "pos_frac": 0.859375, "sample": [14.116708755493164, 6.823421478271484, 2.4132823944091797, 25.004974365234375, 3.3943252563476562, 18.296142578125, 10.391258239746094, 25.627288818359375, 30.881671905517578, 13.628974914550781, 15.547798156738281, 21.70403289794922, 9.41853141784668, 32.76177215576172, 4.643775939941406, 5.168205261230469, 8.654170989990234, 67.81199645996094, 33.54064178466797, 6.175224304199219, -5.329345703125, 12.349172592163086, 0.09234619140625, 12.422584533691406, -15.177459716796875, 6.818414688110352, 5.722282409667969, 15.295303344726562, 6.6016387939453125, 7.512233734130859, 30.2901611328125, 18.768054962158203, 21.798919677734375, 9.323400497436523, 28.223167419433594, 4.341314315795898, -3.2081985473632812, 26.065277099609375, 3.509702682495117, -9.299430847167969, 7.125888824462891, 1.4480228424072266, -6.454570770263672, -6.582511901855469, 17.333702087402344, 29.960433959960938, 48.63861083984375, 28.277313232421875, 8.689373016357422, 5.8512725830078125, 11.211769104003906, -1.3231220245361328, 16.909324645996094, 16.0391845703125, 16.207839965820312, 11.799766540527344, -0.7352752685546875, 14.314308166503906, 35.48503875732422, 22.83794403076172, 23.10638427734375, 31.961746215820312, 12.017234802246094, -40.72001647949219], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000076.npy"}
|
|
{"epoch": 0.11160058737151249, "step": 77, "batch_size": 64, "mean": 21.032672882080078, "std": 21.576066970825195, "min": -38.318603515625, "p10": 0.27487831115722716, "median": 21.25376319885254, "p90": 54.14229125976563, "max": 75.7021484375, "pos_frac": 0.90625, "sample": [16.707725524902344, 44.08313751220703, 30.422374725341797, 40.50310134887695, 42.40025329589844, 0.018543243408203125, 60.5296630859375, 21.76755142211914, 9.05514144897461, 10.780628204345703, 6.121145248413086, 33.88383483886719, 38.02116394042969, 6.341548919677734, 5.48211669921875, 24.257537841796875, -38.318603515625, 20.927391052246094, 34.697349548339844, 32.45111083984375, 13.960329055786133, -2.641643524169922, 9.847160339355469, 0.8729934692382812, 2.6449813842773438, 52.96697998046875, 2.849496841430664, 25.77545928955078, 11.363990783691406, -5.327451705932617, 32.4516487121582, 28.440109252929688, 13.593725204467773, 63.42774963378906, 33.53968048095703, 10.045686721801758, 56.184112548828125, 75.7021484375, 20.51067352294922, 11.814491271972656, -30.460357666015625, 7.825172424316406, -12.5245361328125, 3.579030990600586, 12.319158554077148, 26.97894287109375, 32.34975814819336, 25.60822105407715, 26.594879150390625, 25.942481994628906, 7.614841461181641, -4.9944305419921875, 4.948877334594727, 25.38867950439453, 28.593101501464844, 23.128135681152344, 3.8115158081054688, 55.45977783203125, 67.45161437988281, 11.564571380615234, 23.25457000732422, 54.64599609375, 21.580135345458984, 7.2760009765625], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000077.npy"}
|
|
{"epoch": 0.1130690161527166, "step": 78, "batch_size": 64, "mean": 16.198915481567383, "std": 18.161052703857422, "min": -19.961151123046875, "p10": -1.9990493774414062, "median": 16.240592002868652, "p90": 41.883554840087896, "max": 78.49606323242188, "pos_frac": 0.828125, "sample": [4.0842437744140625, 1.8294715881347656, 40.619659423828125, 16.33578109741211, -2.0582199096679688, 45.56382751464844, 16.685382843017578, 78.49606323242188, 26.761688232421875, 21.713253021240234, 21.403079986572266, 8.281993865966797, -4.909793853759766, 14.623220443725586, 22.34589385986328, 20.770694732666016, -0.7244873046875, 9.212350845336914, -7.068367004394531, 2.2581024169921875, 6.515523910522461, 17.486143112182617, 26.30556869506836, 21.44043731689453, 19.21506118774414, 14.993152618408203, 17.917362213134766, 16.458740234375, 21.2130126953125, 5.717433929443359, 42.42522430419922, 16.710973739624023, 5.770851135253906, -9.682422637939453, 25.36526107788086, -9.907958984375, 51.7220458984375, 21.409934997558594, 43.76771545410156, 0.22602081298828125, 36.98698806762695, 3.9124374389648438, 49.863189697265625, 32.71213150024414, 28.523605346679688, 0.39659881591796875, 11.109695434570312, 3.6831283569335938, 12.949073791503906, 7.762180328369141, 6.288120269775391, -1.0949020385742188, 16.145402908325195, -1.8609848022460938, 30.643753051757812, -19.961151123046875, 51.10589599609375, 4.855705261230469, -16.922698974609375, 27.137351989746094, -0.602691650390625, 34.05772399902344, 1.7850112915039062, 25.961105346679688], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000078.npy"}
|
|
{"epoch": 0.1145374449339207, "step": 79, "batch_size": 64, "mean": 17.921157836914062, "std": 22.432247161865234, "min": -20.076370239257812, "p10": -5.45579662322998, "median": 13.274473190307617, "p90": 50.07471771240237, "max": 74.4649658203125, "pos_frac": 0.75, "sample": [-5.584251403808594, 52.95470428466797, 6.481439590454102, 69.02546691894531, 5.612113952636719, -0.07262802124023438, -2.334867477416992, -10.081962585449219, 43.7044677734375, 38.16859436035156, 19.17858123779297, -20.076370239257812, 24.779312133789062, 68.47981262207031, 9.968589782714844, 24.776634216308594, 73.18966674804688, 52.60670471191406, 17.705509185791016, -8.575653076171875, 74.4649658203125, 17.4771728515625, -3.2143096923828125, -11.14288330078125, 28.056480407714844, 1.175384521484375, 1.0134811401367188, 16.282405853271484, 13.873416900634766, 60.48094940185547, 3.5671157836914062, 8.943140029907227, 12.675529479980469, 27.614166259765625, -10.212158203125, 10.802253723144531, -1.0563392639160156, 6.999839782714844, 41.48347473144531, 39.30210876464844, 19.790119171142578, 33.021034240722656, 33.91993713378906, 19.533309936523438, 26.98516082763672, -4.837888717651367, 2.8157501220703125, 30.42290496826172, 35.49913024902344, 11.252471923828125, 15.830421447753906, -5.156068801879883, -1.4497146606445312, 38.90318298339844, -1.110870361328125, 21.48028564453125, 5.806644439697266, 6.450281143188477, 0.10730743408203125, -7.975440979003906, 7.842470169067383, 44.166748046875, -3.894573211669922, 19.059425354003906], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000079.npy"}
|
|
{"epoch": 0.11600587371512482, "step": 80, "batch_size": 64, "mean": 16.27719497680664, "std": 22.810808181762695, "min": -20.65270233154297, "p10": -6.275245666503906, "median": 11.925914764404297, "p90": 41.70851974487305, "max": 89.83572387695312, "pos_frac": 0.78125, "sample": [27.08300018310547, 8.73978042602539, -6.3813323974609375, 23.06810760498047, 6.261178970336914, 3.16229248046875, -3.0814857482910156, -7.12591552734375, 28.397476196289062, 4.779411315917969, 3.892946243286133, 16.008647918701172, -15.006553649902344, -20.65270233154297, -6.0277099609375, 23.3651123046875, 89.83572387695312, 37.166419982910156, 82.51824951171875, 46.69574737548828, 23.570343017578125, 6.3539276123046875, 41.88934326171875, 41.286598205566406, 34.68836975097656, 15.014162063598633, -1.3204574584960938, 31.539779663085938, 14.723630905151367, 11.9439697265625, 2.3736190795898438, -15.513938903808594, 5.599048614501953, -5.759559631347656, 24.493667602539062, -19.712669372558594, 58.16070556640625, 30.64600372314453, -7.0482025146484375, 83.52825927734375, 10.30361557006836, 3.605621337890625, 2.3960914611816406, 1.0030136108398438, 44.154869079589844, 11.907859802246094, 23.44194793701172, 28.021621704101562, 15.761396408081055, 11.701400756835938, -3.518840789794922, -3.0134620666503906, 0.22230911254882812, 5.366142272949219, 23.352495193481445, 22.123428344726562, 5.660480499267578, 2.7981796264648438, 32.596885681152344, 25.027206420898438, 14.446479797363281, 36.18559265136719, -5.262977600097656, 14.3040771484375], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000080.npy"}
|
|
{"epoch": 0.11747430249632893, "step": 81, "batch_size": 64, "mean": 22.524700164794922, "std": 26.601694107055664, "min": -26.156982421875, "p10": -8.62433395385742, "median": 18.55802345275879, "p90": 54.99687194824219, "max": 111.045166015625, "pos_frac": 0.859375, "sample": [41.75617218017578, 31.23590087890625, -26.156982421875, 39.3055419921875, 24.63840103149414, 57.43571472167969, 18.58835220336914, -11.881168365478516, 7.480255126953125, 31.708038330078125, 29.79986572265625, -18.267608642578125, 2.0251617431640625, 24.827285766601562, 83.0263671875, 9.988643646240234, 21.105453491210938, 34.128021240234375, 54.293731689453125, -13.745590209960938, 1.1574268341064453, 9.370269775390625, 20.678436279296875, 70.18470764160156, 24.442161560058594, 11.84554672241211, 15.832244873046875, 7.526943206787109, 6.552013397216797, -6.775367736816406, 24.6859130859375, 9.460296630859375, 5.305599212646484, 12.550392150878906, 29.45672607421875, 111.045166015625, 41.60243606567383, 18.527694702148438, 3.4947967529296875, 55.2982177734375, 40.368072509765625, 46.403411865234375, 14.225105285644531, 0.0871124267578125, 5.034191131591797, -0.7257423400878906, -17.04568862915039, 14.503171920776367, 16.458900451660156, 27.610427856445312, 15.528888702392578, 12.42567253112793, 21.571434020996094, -12.94512939453125, 38.68141174316406, 44.49535369873047, 106.5350341796875, 59.00056457519531, 8.726570129394531, 34.48933410644531, 3.4759521484375, 26.97686767578125, -9.416748046875, 31.58343505859375], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000081.npy"}
|
|
{"epoch": 0.11894273127753303, "step": 82, "batch_size": 64, "mean": 18.03838348388672, "std": 28.69208335876465, "min": -54.23176574707031, "p10": -14.772960662841795, "median": 16.487096786499023, "p90": 49.49603958129883, "max": 115.63088989257812, "pos_frac": 0.78125, "sample": [-6.089138031005859, 30.95423126220703, -17.086685180664062, 18.356414794921875, 20.295848846435547, 18.385976791381836, -23.80866241455078, 15.800765991210938, 24.271759033203125, 57.809059143066406, 66.70933532714844, 13.404914855957031, -12.671150207519531, 8.227142333984375, 14.863485336303711, 102.50971984863281, 12.320137023925781, -51.77776336669922, -8.835647583007812, 32.2371826171875, 1.4953384399414062, 33.130584716796875, 49.41071319580078, 3.9815311431884766, -17.030231475830078, -6.0632476806640625, 4.233528137207031, 22.94781494140625, 20.388290405273438, 12.532913208007812, 20.159156799316406, 35.301666259765625, 34.91718292236328, -0.8779850006103516, 70.13285827636719, 18.667125701904297, 49.53260803222656, 16.261764526367188, 9.559383392333984, 4.778297424316406, -0.8476638793945312, 28.1268310546875, 58.37446594238281, -16.519134521484375, 29.06871795654297, 21.47040557861328, 6.715524673461914, 45.94145584106445, 32.864036560058594, -15.673736572265625, 8.532234191894531, 6.630889892578125, -54.23176574707031, 7.411903381347656, 32.231239318847656, 21.874998092651367, 6.702552795410156, 25.475982666015625, 16.71242904663086, 115.63088989257812, 15.102096557617188, 18.23456382751465, -0.37441253662109375, 45.66584014892578], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000082.npy"}
|
|
{"epoch": 0.12041116005873716, "step": 83, "batch_size": 64, "mean": 20.966205596923828, "std": 25.152734756469727, "min": -25.602371215820312, "p10": -8.00887222290039, "median": 17.52762222290039, "p90": 60.10125427246096, "max": 82.35887145996094, "pos_frac": 0.8125, "sample": [19.605384826660156, 23.762866973876953, 17.27145004272461, 68.90556335449219, 43.100990295410156, 12.375892639160156, -6.280693054199219, 63.196807861328125, 52.839691162109375, 6.179679870605469, 10.089889526367188, 17.52772331237793, 2.4492645263671875, -3.8796920776367188, 17.823928833007812, 67.7423095703125, -14.859573364257812, 82.35887145996094, 9.944770812988281, 71.4496841430664, 38.42628479003906, 11.502265930175781, 51.20323181152344, -19.361167907714844, -25.602371215820312, -8.6878662109375, 52.8782958984375, -18.131378173828125, 17.52752113342285, 64.29226684570312, 13.412284851074219, 14.221145629882812, 4.51304817199707, 2.8073043823242188, 24.15532684326172, 11.346549987792969, 27.222644805908203, -0.6171722412109375, 37.59437561035156, 11.733806610107422, 19.4954833984375, 18.237041473388672, 42.30140686035156, 69.47393798828125, 19.28487777709961, 45.99256896972656, 5.737789154052734, -17.769393920898438, -2.4347801208496094, 2.1075973510742188, -18.218338012695312, 47.36114501953125, 9.251266479492188, 41.51499938964844, 24.430030822753906, 23.0224609375, -6.424552917480469, 7.726604461669922, 17.932815551757812, 32.085296630859375, 41.29412841796875, 16.305702209472656, 27.789520263671875, 3.2983169555664062], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000083.npy"}
|
|
{"epoch": 0.12187958883994127, "step": 84, "batch_size": 64, "mean": 18.37204360961914, "std": 19.07907485961914, "min": -40.14202880859375, "p10": -0.9812202453613278, "median": 16.076417922973633, "p90": 41.002060699462895, "max": 79.00364685058594, "pos_frac": 0.859375, "sample": [8.216728210449219, 14.836135864257812, 3.6687240600585938, 4.773033142089844, 42.29486083984375, 4.5234222412109375, 4.8453369140625, 20.627090454101562, 15.348501205444336, 10.152915954589844, 28.62386703491211, 33.415931701660156, -18.155059814453125, 15.262191772460938, 21.944595336914062, 30.558181762695312, 10.669992446899414, 79.00364685058594, 29.605060577392578, 2.685211181640625, 7.262245178222656, 27.890914916992188, 9.541328430175781, 6.884775161743164, -0.6840705871582031, 40.614158630371094, 24.803871154785156, 31.837310791015625, -40.14202880859375, -1.344573974609375, 50.976051330566406, 2.84722900390625, 36.76226806640625, 15.986682891845703, 17.367141723632812, 7.1690521240234375, 46.277618408203125, 40.44435119628906, 29.905502319335938, 21.911277770996094, 4.175323486328125, 30.197067260742188, -3.6513233184814453, 16.166152954101562, 27.668975830078125, 28.187213897705078, -1.1085700988769531, -2.924060821533203, 31.999610900878906, 31.650291442871094, 0.0243377685546875, 41.168304443359375, 50.72425842285156, 20.284378051757812, 54.47412109375, 14.914199829101562, 29.236083984375, 6.5634918212890625, -13.612586975097656, 29.980857849121094, 12.192081451416016, 18.72063446044922, 9.77731704711914, -0.2088336944580078], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000084.npy"}
|
|
{"epoch": 0.12334801762114538, "step": 85, "batch_size": 64, "mean": 18.73013687133789, "std": 22.948348999023438, "min": -28.10533905029297, "p10": -4.007674026489258, "median": 16.88616371154785, "p90": 52.84176635742188, "max": 67.08082580566406, "pos_frac": 0.71875, "sample": [41.021705627441406, 12.216903686523438, 29.007888793945312, 3.2875900268554688, 0.79742431640625, 21.905258178710938, 24.542015075683594, 2.5162086486816406, 58.191627502441406, 7.827667236328125, -2.493844985961914, 5.4257354736328125, 60.310791015625, 23.36560821533203, 22.97541618347168, 20.157604217529297, -4.233856201171875, -16.46520233154297, 18.724952697753906, -0.6405715942382812, -28.10533905029297, 65.90936279296875, 17.131942749023438, -1.5463790893554688, -2.1357421875, 43.25414276123047, 8.494895935058594, -5.66119384765625, 16.95050811767578, 10.94366455078125, 45.58927917480469, -2.957061767578125, -13.785934448242188, -1.0793304443359375, 13.394271850585938, 4.975254058837891, 62.75025939941406, 16.821819305419922, 36.277748107910156, -0.0020771026611328125, -3.4799156188964844, 67.08082580566406, -1.8217582702636719, 42.414459228515625, 53.6143798828125, 14.671226501464844, 50.7911376953125, 61.15898132324219, 9.429412841796875, 5.055078506469727, 29.50287628173828, 48.084571838378906, -0.4778900146484375, -6.117027282714844, 18.123153686523438, 18.107999801635742, 51.03900146484375, 19.606399536132812, 29.679290771484375, 35.864871978759766, -18.9080810546875, 18.894275665283203, -0.7152976989746094, 41.469825744628906], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000085.npy"}
|
|
{"epoch": 0.12481644640234948, "step": 86, "batch_size": 64, "mean": 19.856426239013672, "std": 33.968658447265625, "min": -48.26544189453125, "p10": -13.915065002441404, "median": 14.003738403320312, "p90": 61.24119415283206, "max": 129.28927612304688, "pos_frac": 0.734375, "sample": [-11.167381286621094, 16.992942810058594, 65.1341552734375, -17.125938415527344, 34.23277282714844, 27.60382843017578, 25.996828079223633, -2.774383544921875, 93.3790283203125, 11.014533996582031, 5.871789932250977, -3.3300018310546875, 1.5458450317382812, 20.363204956054688, -7.717193603515625, 19.269203186035156, 45.294219970703125, 19.397415161132812, -1.9934501647949219, -42.883636474609375, 54.223602294921875, 19.249500274658203, 30.536998748779297, 10.061857223510742, 9.784904479980469, -15.365318298339844, -1.9680709838867188, 64.24873352050781, -4.990146636962891, 6.825674057006836, 36.40024948120117, 18.75798797607422, 7.25599479675293, -48.26544189453125, 42.31089782714844, 50.9049072265625, 25.718215942382812, -33.160011291503906, 66.98628234863281, 3.6553287506103516, 129.28927612304688, 18.503684997558594, 25.304824829101562, 10.411540985107422, -3.1628379821777344, 118.65713500976562, 7.100452423095703, 112.43634033203125, 44.19989013671875, -2.176422119140625, -27.79290008544922, 38.61103057861328, 9.607370376586914, 32.759490966796875, 27.303688049316406, 35.091609954833984, 36.241729736328125, 4.915018081665039, 3.3871498107910156, 4.88298225402832, -3.677745819091797, -15.092643737792969, 2.9709930419921875, 18.763675689697266], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000086.npy"}
|
|
{"epoch": 0.1262848751835536, "step": 87, "batch_size": 64, "mean": 22.8221492767334, "std": 30.144874572753906, "min": -63.070404052734375, "p10": -4.764495086669921, "median": 19.265869140625, "p90": 60.51042938232423, "max": 121.85108947753906, "pos_frac": 0.8125, "sample": [21.57761573791504, 32.790306091308594, 0.7971477508544922, 9.48135757446289, 33.238983154296875, -4.348480224609375, 27.12989044189453, -0.615753173828125, 0.48926544189453125, -1.3361892700195312, -18.342269897460938, 40.27250671386719, -4.942787170410156, 61.78962707519531, 27.805679321289062, 51.03535461425781, 50.482017517089844, 9.787878036499023, 86.60543823242188, 51.54986572265625, 19.2021484375, 18.32187271118164, 7.082862854003906, 38.21269989013672, 35.766761779785156, 2.0306358337402344, 34.9913330078125, 121.85108947753906, 14.417633056640625, 2.2576904296875, 14.150758743286133, 68.90838623046875, 56.711944580078125, 8.39419937133789, 53.53950500488281, 24.16830825805664, 14.111330032348633, 36.4383430480957, 19.32958984375, 63.56849670410156, 0.5114707946777344, 74.77198791503906, 16.294227600097656, 20.740234375, -63.070404052734375, 9.925622940063477, 37.68970489501953, -32.98719024658203, 36.00599670410156, 65.89602661132812, -12.713764190673828, -13.173942565917969, 7.938543319702148, 18.94080352783203, -3.54669189453125, 57.525634765625, 29.84185791015625, 31.947769165039062, -33.43647766113281, 21.738990783691406, 15.2886962890625, 1.3313732147216797, 47.650787353515625, -3.196697235107422], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000087.npy"}
|
|
{"epoch": 0.1277533039647577, "step": 88, "batch_size": 64, "mean": 17.228668212890625, "std": 28.29409408569336, "min": -47.966033935546875, "p10": -5.552677917480468, "median": 12.398456573486328, "p90": 53.05799407958987, "max": 114.99273681640625, "pos_frac": 0.78125, "sample": [10.790267944335938, 11.997390747070312, 61.301544189453125, 55.63447570800781, 32.087921142578125, 43.13951110839844, 2.999774932861328, 59.85838317871094, -4.421875, 12.757957458496094, 30.254440307617188, 5.994415283203125, 3.6474838256835938, 43.294105529785156, -19.120223999023438, 27.200958251953125, -18.111557006835938, 15.934524536132812, -42.59019470214844, -47.966033935546875, -3.7261219024658203, 67.71713256835938, 34.9940185546875, -2.6045150756835938, 40.31800079345703, 4.941507339477539, 25.076705932617188, -6.0373077392578125, 20.89912986755371, 21.391441345214844, 37.39067077636719, 7.391937255859375, -1.2218036651611328, 25.668960571289062, 1.12786865234375, 83.489013671875, 14.467811584472656, 47.04620361328125, 4.350334167480469, 114.99273681640625, 4.862102508544922, 38.77784729003906, 5.004812240600586, 13.761125564575195, 12.038955688476562, 21.531845092773438, 1.7985553741455078, -1.4205474853515625, 30.160079956054688, 69.94305419921875, 30.180885314941406, 13.9085693359375, 0.8376655578613281, 8.837799072265625, -44.95494079589844, -2.2264556884765625, 9.811182022094727, -1.5337162017822266, 10.210805892944336, 14.543542861938477, 18.493810653686523, -20.324234008789062, 8.39776611328125, 37.63722229003906], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000088.npy"}
|
|
{"epoch": 0.12922173274596183, "step": 89, "batch_size": 64, "mean": 18.97100257873535, "std": 23.95474624633789, "min": -50.6495361328125, "p10": -1.8304300308227537, "median": 16.06578254699707, "p90": 43.49020004272462, "max": 110.27760314941406, "pos_frac": 0.875, "sample": [4.3946075439453125, 1.6139678955078125, 4.7515411376953125, 20.188833236694336, -8.159931182861328, 44.119659423828125, 35.198890686035156, 15.829269409179688, -6.1368865966796875, 53.454322814941406, 52.56993865966797, 22.03888702392578, 19.65387725830078, 40.70332336425781, 32.308189392089844, 29.248409271240234, 12.731536865234375, 9.197685241699219, -2.193143844604492, 17.667556762695312, 13.602516174316406, 2.894227981567383, 15.785120010375977, 10.262752532958984, -7.0679931640625, 2.1891613006591797, 0.8377151489257812, 9.096378326416016, 63.607269287109375, 17.84765625, 21.3897705078125, 77.7581787109375, 16.302295684814453, 42.021461486816406, 20.29300308227539, 2.9505157470703125, 13.644645690917969, 110.27760314941406, 3.1419830322265625, -1.6596317291259766, 28.204463958740234, 1.99383544921875, 19.66465187072754, 27.644580841064453, 15.516958236694336, 5.5949249267578125, 9.628515243530273, 5.725643157958984, 3.275421142578125, 32.24127197265625, 19.39080047607422, 35.97538757324219, -1.9036293029785156, 14.160446166992188, 38.74042510986328, -50.6495361328125, -19.150360107421875, 25.320274353027344, 16.592302322387695, 18.086246490478516, 22.872772216796875, 22.52892303466797, 82.28517150878906, 6.049488067626953], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000089.npy"}
|
|
{"epoch": 0.13069016152716592, "step": 90, "batch_size": 64, "mean": 24.963214874267578, "std": 35.91521072387695, "min": -41.79258728027344, "p10": -7.496365547180172, "median": 16.733497619628906, "p90": 79.38119506835938, "max": 127.31390380859375, "pos_frac": 0.828125, "sample": [58.97645568847656, -37.83013916015625, 29.007675170898438, 79.29083251953125, 21.488300323486328, 33.35383605957031, 0.41057586669921875, -3.2233734130859375, 50.950897216796875, -1.2081317901611328, 40.77129364013672, 16.696990966796875, 0.5797691345214844, 11.864639282226562, 33.658203125, 79.419921875, -4.006839752197266, 127.31390380859375, 45.03675842285156, 2.8737030029296875, 2.8873233795166016, 7.78570556640625, 16.770004272460938, -8.976272583007812, 101.48541259765625, 16.18425750732422, 24.20172119140625, 106.37908935546875, 4.660869598388672, 22.504009246826172, 120.59576416015625, 19.81409454345703, -19.766632080078125, 9.500526428222656, 1.0649337768554688, -41.79258728027344, 25.083988189697266, 6.883964538574219, 55.514007568359375, 5.188728332519531, 12.287239074707031, 11.35235595703125, -19.064208984375, -4.043249130249023, 17.57415771484375, 72.87530517578125, 86.88055419921875, -32.807029724121094, -23.602157592773438, 10.117103576660156, 30.299041748046875, 1.7723312377929688, 17.747779846191406, 30.13257598876953, 57.808319091796875, 29.883201599121094, 10.210783004760742, 8.467432022094727, 44.418426513671875, 13.40951156616211, 20.392913818359375, 14.34803581237793, 87.9481201171875, 37.843040466308594], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000090.npy"}
|
|
{"epoch": 0.13215859030837004, "step": 91, "batch_size": 64, "mean": 21.788610458374023, "std": 29.779512405395508, "min": -27.548690795898438, "p10": -14.11779098510742, "median": 19.63234519958496, "p90": 61.517736816406284, "max": 100.45794677734375, "pos_frac": 0.75, "sample": [17.28974151611328, 48.643524169921875, -26.918670654296875, 6.941616058349609, 64.80714416503906, 0.5307712554931641, -11.928535461425781, 40.82822799682617, 5.148210525512695, 18.702529907226562, 39.27128601074219, 35.52091979980469, 37.72063446044922, 12.8812255859375, -27.548690795898438, 99.91560363769531, -10.562370300292969, 2.226593017578125, 7.00933837890625, 75.14151000976562, 10.431365966796875, 30.508914947509766, 100.45794677734375, -12.898735046386719, 26.90003204345703, 51.84874725341797, -14.827362060546875, 1.8811397552490234, 20.163047790527344, -4.10162353515625, -12.26507568359375, 20.301288604736328, 52.56309509277344, -16.29095458984375, 19.19355010986328, 19.23625946044922, 50.813133239746094, -10.431114196777344, 20.028430938720703, -3.706247329711914, 8.676315307617188, 30.045501708984375, 53.84245300292969, 3.3514862060546875, -14.640243530273438, 34.169830322265625, 22.79217529296875, 71.60151672363281, 27.688522338867188, 35.616233825683594, 13.997459411621094, -21.534889221191406, 1.043802261352539, 21.537216186523438, -2.382068634033203, 41.24898147583008, -0.2072010040283203, 76.31045532226562, 40.70295715332031, 21.535457611083984, 47.12847900390625, -19.179039001464844, 40.983062744140625, 74.71614837646484], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000091.npy"}
|
|
{"epoch": 0.13362701908957417, "step": 92, "batch_size": 64, "mean": 21.574983596801758, "std": 28.9274845123291, "min": -40.30064392089844, "p10": -8.86779098510742, "median": 18.258459091186523, "p90": 50.840399169921874, "max": 108.21713256835938, "pos_frac": 0.734375, "sample": [-2.8746166229248047, 4.999603271484375, 8.600799560546875, 41.59333801269531, -8.299549102783203, 6.049736022949219, -40.30064392089844, -13.729804992675781, 40.375022888183594, 22.175983428955078, 59.11454391479492, 0.9421138763427734, 38.191619873046875, 2.581239700317383, -1.4929275512695312, 97.40286254882812, 50.68486022949219, 108.21713256835938, -28.132232666015625, -5.944976806640625, 22.97049331665039, -9.088027954101562, 26.8963623046875, -6.2904815673828125, -13.424827575683594, 49.090667724609375, -9.579261779785156, 64.035400390625, 28.89667510986328, 6.17552375793457, 68.23258209228516, -1.8711395263671875, 31.879966735839844, 1.4700031280517578, 48.173675537109375, 6.565460205078125, 26.500411987304688, 89.72834777832031, 9.640876770019531, 49.51429748535156, 40.34191131591797, 50.90705871582031, -1.47991943359375, -2.21124267578125, 7.082042694091797, 21.614757537841797, 2.5305023193359375, 35.082763671875, 33.20527648925781, 10.299140930175781, -0.5111770629882812, -11.176111221313477, 50.18781661987305, 14.077167510986328, 16.983829498291016, 19.53308868408203, 34.590179443359375, 41.84162139892578, 40.30030822753906, 22.14788055419922, 11.618169784545898, -8.353904724121094, 50.49946594238281, 32.01727294921875], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000092.npy"}
|
|
{"epoch": 0.13509544787077826, "step": 93, "batch_size": 64, "mean": 21.845975875854492, "std": 25.868497848510742, "min": -31.827964782714844, "p10": -2.899954223632811, "median": 14.26284408569336, "p90": 53.61547164916992, "max": 132.00674438476562, "pos_frac": 0.84375, "sample": [21.252578735351562, 8.791065216064453, 24.161468505859375, 58.10327911376953, 53.6312255859375, 46.49009704589844, 6.1175689697265625, 20.312416076660156, 39.20610809326172, -9.773872375488281, 64.19100952148438, 14.128807067871094, 46.069358825683594, 46.93614959716797, 132.00674438476562, 10.957172393798828, 6.443870544433594, 5.6981658935546875, 5.8785858154296875, 80.32886505126953, -6.7549896240234375, 12.850566864013672, 19.958415985107422, -0.458282470703125, 28.16680908203125, 11.86517333984375, 26.15882110595703, 6.116477966308594, -1.7456741333007812, 12.720428466796875, -0.47754859924316406, 14.396881103515625, 8.481595993041992, 20.37792205810547, 32.136497497558594, 46.19615173339844, 57.949180603027344, -16.49994659423828, -4.162517547607422, 42.48028564453125, 39.36670684814453, 33.10660171508789, 27.06678009033203, 5.439489364624023, 9.76317024230957, 7.688636779785156, 25.900375366210938, 2.624227523803711, 53.578712463378906, 3.188251495361328, -3.3946456909179688, 53.6317138671875, 9.951950073242188, 13.821586608886719, 5.575185775756836, 4.9019927978515625, -15.276527404785156, 32.00542449951172, 42.19309997558594, 32.805511474609375, 39.55848693847656, 14.787910461425781, -31.827964782714844, 0.9988613128662109], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000093.npy"}
|
|
{"epoch": 0.13656387665198239, "step": 94, "batch_size": 64, "mean": 24.53407859802246, "std": 26.643558502197266, "min": -16.171401977539062, "p10": -5.970870399475097, "median": 22.523531913757324, "p90": 59.76775588989259, "max": 97.04696655273438, "pos_frac": 0.78125, "sample": [23.544353485107422, -13.211280822753906, 40.31446838378906, 17.099559783935547, 6.2540435791015625, -10.511981964111328, 8.255485534667969, 54.93668746948242, 5.358242034912109, -14.90325927734375, 22.913400650024414, 13.350173950195312, 22.211505889892578, 4.677482604980469, 10.229072570800781, 0.1949939727783203, 52.52227783203125, 11.85280990600586, -6.309558868408203, 97.04696655273438, 6.183725357055664, 23.29360580444336, 28.14533233642578, 48.998863220214844, 69.547119140625, 24.566497802734375, 13.303146362304688, 26.417449951171875, 19.02267837524414, -1.5625457763671875, -2.3745784759521484, 16.254491806030273, 32.36219787597656, 50.324951171875, 41.13669967651367, 17.523723602294922, 31.966018676757812, 60.97222137451172, 32.53083801269531, 22.83555793762207, 4.667091369628906, 41.89929962158203, -0.6336212158203125, -12.042823791503906, 61.78938293457031, 67.91090393066406, -5.180597305297852, 56.15449523925781, 90.11956787109375, -4.850517272949219, -16.171401977539062, 80.857177734375, 25.362220764160156, 48.94618225097656, -2.4930648803710938, 15.834564208984375, -11.24853515625, -2.1839370727539062, 34.299278259277344, 11.106979370117188, 32.845699310302734, 48.72393798828125, 56.95733642578125, 40.238006591796875], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000094.npy"}
|
|
{"epoch": 0.13803230543318648, "step": 95, "batch_size": 64, "mean": 19.916799545288086, "std": 24.45707130432129, "min": -57.01957702636719, "p10": -4.8144136428832995, "median": 16.057157516479492, "p90": 52.62860565185547, "max": 76.4697265625, "pos_frac": 0.828125, "sample": [11.546798706054688, 36.81793975830078, 9.896797180175781, 21.230087280273438, 18.268545150756836, 57.21478271484375, 9.875728607177734, 6.052942276000977, -5.2169189453125, 16.085906982421875, 18.71625518798828, 3.7782363891601562, 30.287128448486328, 71.17915344238281, 47.25303649902344, -0.08524322509765625, 32.83164978027344, 4.156492233276367, 12.777597427368164, 37.820068359375, 16.508750915527344, 32.87178039550781, 18.582048416137695, 13.772809982299805, 9.432762145996094, -8.144515991210938, 8.905380249023438, 43.48216247558594, 18.89157485961914, 26.535079956054688, -6.138313293457031, -5.3414459228515625, 49.499267578125, 76.4697265625, 70.54524230957031, 50.82350158691406, 29.222885131835938, 2.2906723022460938, 1.258697509765625, 53.4022216796875, 40.5097541809082, 68.0245361328125, 64.54142761230469, 4.066322326660156, 14.642318725585938, 9.324148178100586, -0.6769733428955078, 28.97752571105957, 20.54712677001953, -57.01957702636719, 1.7093620300292969, 2.1207637786865234, -20.319488525390625, 36.20045471191406, 39.48859405517578, 13.143989562988281, 36.83685302734375, 11.447868347167969, 16.02840805053711, 16.21783447265625, -3.875234603881836, -0.9093914031982422, 15.444282531738281, -25.152976989746094], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000095.npy"}
|
|
{"epoch": 0.1395007342143906, "step": 96, "batch_size": 64, "mean": 26.69620132446289, "std": 30.379751205444336, "min": -45.75994873046875, "p10": 0.16478519439697276, "median": 19.8350887298584, "p90": 71.19367752075199, "max": 135.6867218017578, "pos_frac": 0.921875, "sample": [50.60978698730469, 19.77220916748047, 31.119171142578125, 25.714427947998047, 24.123001098632812, 54.938079833984375, 7.4791717529296875, 2.3222694396972656, 0.12894630432128906, 26.520767211914062, 98.74900817871094, 12.954544067382812, 12.84832763671875, -1.6911811828613281, 0.24840927124023438, 19.897968292236328, 27.67737579345703, 40.91676330566406, 79.86908721923828, 77.21176147460938, 8.853866577148438, 38.72825622558594, 31.76512908935547, 135.6867218017578, 62.510643005371094, 14.76174545288086, 21.048908233642578, 14.701858520507812, 16.618896484375, 15.812122344970703, 7.206336975097656, 110.73391723632812, 30.37233543395996, 25.495803833007812, 4.5795440673828125, 74.91497802734375, 46.67988586425781, 5.75, 32.77255630493164, 2.269266128540039, 41.568267822265625, 26.557729721069336, 17.47222137451172, -5.594047546386719, 13.06130599975586, -0.44844627380371094, 3.52117919921875, 33.960533142089844, 0.7968940734863281, 19.898452758789062, 11.069625854492188, 48.00236511230469, -12.133527755737305, 2.7458343505859375, 52.21281433105469, 9.206626892089844, 15.498373031616211, 49.42155456542969, 76.649169921875, 7.902076721191406, 0.023443222045898438, 26.98925018310547, -45.75994873046875, 3.2624664306640625], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000096.npy"}
|
|
{"epoch": 0.14096916299559473, "step": 97, "batch_size": 64, "mean": 21.052751541137695, "std": 23.609622955322266, "min": -47.82250213623047, "p10": -5.667726516723631, "median": 19.64161491394043, "p90": 49.05971069335938, "max": 92.98004150390625, "pos_frac": 0.828125, "sample": [15.395469665527344, 46.575103759765625, 11.3984375, 4.484336853027344, -3.528759002685547, 1.1295089721679688, 15.689437866210938, 25.510772705078125, 1.8150672912597656, 39.265869140625, 20.097400665283203, 35.708290100097656, -0.6206855773925781, 17.974071502685547, 71.15509033203125, 49.405975341796875, -14.906318664550781, 23.445175170898438, 1.0614395141601562, 11.940483093261719, 24.136436462402344, 6.435199737548828, 14.197345733642578, -15.302490234375, 59.809722900390625, 19.048627853393555, 24.031219482421875, 27.611434936523438, 38.55012512207031, 13.690982818603516, 24.19562530517578, -3.35009765625, 14.211532592773438, -8.440095901489258, 39.956443786621094, 32.33868408203125, 47.50971221923828, 26.343334197998047, -47.82250213623047, -6.90416145324707, 43.21405029296875, 31.212398529052734, -6.5844268798828125, 6.1817626953125, 48.89964294433594, 29.658222198486328, 1.4473800659179688, 19.185829162597656, 20.194440841674805, 15.047920227050781, -3.0194034576416016, 49.12831115722656, -8.424297332763672, 46.4989128112793, 3.3616676330566406, 53.95375061035156, 0.729339599609375, 44.196868896484375, 43.280860900878906, 56.010467529296875, 30.391681671142578, 5.1719818115234375, 92.98004150390625, 21.41546630859375], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000097.npy"}
|
|
{"epoch": 0.14243759177679882, "step": 98, "batch_size": 64, "mean": 23.86449432373047, "std": 30.320737838745117, "min": -79.47860717773438, "p10": -9.727111816406248, "median": 24.250701904296875, "p90": 59.75626831054688, "max": 101.19677734375, "pos_frac": 0.828125, "sample": [30.598793029785156, 6.425182342529297, 62.39451599121094, 35.173797607421875, 5.628822326660156, 18.799779891967773, 18.95025634765625, 23.63134765625, 24.129074096679688, 5.9943389892578125, 26.616884231567383, 2.260955810546875, 19.414382934570312, 18.70563507080078, 59.48097229003906, 46.15887451171875, -10.773296356201172, 6.9759979248046875, 1.4556083679199219, -18.047691345214844, 38.13734817504883, 32.74681854248047, -20.809120178222656, -23.92279052734375, 52.734031677246094, 47.41921615600586, 30.465438842773438, 60.28136444091797, 59.87425231933594, 24.372329711914062, 32.22119140625, -1.2045745849609375, 54.701271057128906, 101.19677734375, -79.47860717773438, 71.47044372558594, 53.08258056640625, 25.383453369140625, 39.17347717285156, 13.168472290039062, -7.286014556884766, 36.14525604248047, -35.3697509765625, 21.686203002929688, 29.771575927734375, 3.634258270263672, 36.287010192871094, 75.01800537109375, 52.90039825439453, 34.45432662963867, 12.504348754882812, 4.7884674072265625, 58.59881591796875, 89.58920288085938, 31.38245391845703, 24.66745376586914, 20.736526489257812, 11.981512069702148, 0.3260688781738281, -6.289592742919922, 50.08213806152344, -2.506755828857422, 3.5079116821289062, -14.269783020019531], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000098.npy"}
|
|
{"epoch": 0.14390602055800295, "step": 99, "batch_size": 64, "mean": 28.275169372558594, "std": 33.9051399230957, "min": -23.03955078125, "p10": -3.6146263122558593, "median": 19.392457962036133, "p90": 67.33046417236329, "max": 146.73240661621094, "pos_frac": 0.84375, "sample": [-0.3783550262451172, -3.64593505859375, 5.122983932495117, 19.698368072509766, 3.0297584533691406, 38.48649978637695, 20.62476348876953, 22.82781982421875, 91.51165771484375, 18.884641647338867, 19.0865478515625, 101.43475341796875, 10.907352447509766, -22.863174438476562, 67.93058776855469, -0.41412353515625, 17.270614624023438, 30.9327392578125, 0.16214370727539062, 13.548919677734375, 53.77008056640625, 11.973121643066406, 9.113069534301758, 136.93145751953125, 7.237251281738281, 46.62335205078125, 5.220359802246094, -16.570533752441406, 10.648618698120117, -23.03955078125, -5.365257263183594, 49.60716247558594, 34.66876983642578, 23.517436981201172, 53.18238830566406, 146.73240661621094, 65.82737731933594, 29.050411224365234, 16.521011352539062, 16.987884521484375, 4.422386169433594, 25.68658447265625, 30.72954559326172, 65.93017578125, 90.28845977783203, 3.8017120361328125, 12.162288665771484, 59.060333251953125, -3.5415725708007812, 3.121641159057617, 30.399093627929688, 71.82978820800781, 26.635082244873047, 10.78676986694336, 46.87860107421875, -4.410545349121094, 37.10666275024414, 14.794425964355469, 55.04571533203125, 50.627662658691406, 21.131824493408203, 40.861961364746094, 2.8028907775878906, -13.33599853515625], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000099.npy"}
|
|
{"epoch": 0.14537444933920704, "step": 100, "batch_size": 64, "mean": 17.573741912841797, "std": 29.567665100097656, "min": -54.370697021484375, "p10": -13.945474052429196, "median": 16.69904327392578, "p90": 56.326676177978534, "max": 121.40524291992188, "pos_frac": 0.765625, "sample": [48.16150665283203, 28.601316452026367, -5.452600479125977, 13.880462646484375, 20.010791778564453, -1.439056396484375, 30.116125106811523, 18.10928726196289, 7.840444564819336, 26.60260009765625, 10.77691650390625, 0.7351570129394531, 31.25958251953125, 20.861968994140625, 3.1598644256591797, 68.8577880859375, 17.97906494140625, -36.290435791015625, -18.167007446289062, -36.910552978515625, -11.236505508422852, -6.722957611083984, 1.7225894927978516, -49.382083892822266, 51.8939094543457, 21.043182373046875, 44.31242370605469, 21.487747192382812, 0.9943027496337891, 5.56123161315918, 37.061012268066406, -0.03656005859375, 41.206634521484375, 2.866100311279297, 59.34843444824219, 27.514297485351562, 7.688924789428711, 9.838798522949219, 52.080528259277344, 19.102615356445312, 34.80051040649414, 13.823974609375, 9.58349609375, 1.110208511352539, 26.504058837890625, 30.95867919921875, -0.8759918212890625, -22.856033325195312, -5.5606842041015625, 60.520965576171875, 31.33538055419922, 62.624290466308594, 22.40934944152832, -54.370697021484375, 121.40524291992188, -6.396097183227539, 15.419021606445312, 67.732666015625, -15.106460571289062, 58.146453857421875, 37.131561279296875, 5.806175231933594, 31.142677307128906, 14.392864227294922], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000100.npy"}
|
|
{"epoch": 0.14684287812041116, "step": 101, "batch_size": 64, "mean": 20.904651641845703, "std": 32.66230392456055, "min": -38.74854278564453, "p10": -14.42895927429199, "median": 17.317583084106445, "p90": 69.42059020996099, "max": 121.53903198242188, "pos_frac": 0.703125, "sample": [74.85050964355469, 121.53903198242188, 26.802383422851562, 25.90790557861328, -8.807632446289062, 85.11493682861328, 90.49856567382812, 6.857574462890625, 78.76914978027344, 89.59419250488281, -21.975616455078125, 22.567642211914062, -28.048866271972656, 43.27888488769531, 12.27861213684082, -5.108192443847656, -12.950386047363281, 12.57697868347168, 48.922584533691406, 26.582908630371094, -8.201568603515625, 19.959732055664062, 43.65986633300781, 49.747642517089844, 18.96319580078125, 16.65747833251953, 6.845241546630859, 54.84453582763672, 93.17413330078125, 19.658676147460938, 0.8193206787109375, -5.578086853027344, -11.968242645263672, 43.55281448364258, 17.41817283630371, 31.749465942382812, 56.75077819824219, 36.45292663574219, -5.3397674560546875, 6.487880706787109, -2.0584049224853516, -15.062633514404297, 17.19398307800293, -1.0875701904296875, 5.3250579833984375, -10.972639083862305, 26.49944305419922, 26.97344970703125, 7.398355484008789, 21.1794490814209, -38.74854278564453, 33.321693420410156, 9.765380859375, -21.793434143066406, 23.703052520751953, 17.21699333190918, -7.7841796875, 32.44858932495117, 4.89202880859375, -1.4372444152832031, 44.916229248046875, -21.65597152709961, 33.47460174560547, -20.715301513671875], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000101.npy"}
|
|
{"epoch": 0.14831130690161526, "step": 102, "batch_size": 64, "mean": 15.713181495666504, "std": 30.38352394104004, "min": -54.4715576171875, "p10": -16.752603912353514, "median": 12.35986328125, "p90": 44.560192871093754, "max": 117.3099365234375, "pos_frac": 0.6875, "sample": [2.5433692932128906, 39.24651336669922, 42.59117889404297, 7.541229248046875, 10.205398559570312, 4.833320617675781, -16.45001220703125, -34.474151611328125, 21.448209762573242, -51.387115478515625, 27.034942626953125, 69.90213012695312, -5.6187896728515625, 32.931060791015625, 22.530532836914062, -6.528331756591797, 10.075973510742188, 30.28335952758789, 40.16905212402344, 0.6849403381347656, -6.0769805908203125, -3.598421096801758, 36.420326232910156, -3.5791072845458984, 44.314697265625, 19.133399963378906, 32.87223815917969, 117.3099365234375, -1.5811614990234375, 0.9027824401855469, 89.4306411743164, -0.27340126037597656, 33.87888717651367, 37.02055358886719, 3.0267581939697266, -11.177894592285156, 25.182401657104492, -54.4715576171875, 0.696533203125, -21.71076202392578, 0.43881797790527344, -30.614212036132812, -16.882286071777344, 1.391357421875, 44.6654052734375, 19.31402587890625, 33.70954895019531, 68.36331176757812, 36.95068359375, 14.514328002929688, 58.76142120361328, -17.119094848632812, -3.32452392578125, 9.265850067138672, 48.274436950683594, -11.23504638671875, 33.340721130371094, 27.791915893554688, 20.989999771118164, 33.165733337402344, -9.619514465332031, -13.585647583007812, 32.92803192138672, 38.875667572021484], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000102.npy"}
|
|
{"epoch": 0.14977973568281938, "step": 103, "batch_size": 64, "mean": 30.00634002685547, "std": 32.49298095703125, "min": -19.36090087890625, "p10": -5.985634613037109, "median": 25.251285552978516, "p90": 68.34756240844727, "max": 138.30850219726562, "pos_frac": 0.8125, "sample": [-2.0896682739257812, 8.734329223632812, 12.736143112182617, 24.888626098632812, -6.07403564453125, -18.656578063964844, 32.83101272583008, 34.00763702392578, 10.871450424194336, 17.835857391357422, 25.15813446044922, 37.010040283203125, 11.606590270996094, 21.834983825683594, 0.05248260498046875, 59.580322265625, 90.79910278320312, 30.782333374023438, 41.800437927246094, 46.03675079345703, -15.330978393554688, -3.19293212890625, 37.95237731933594, 32.13993835449219, -19.36090087890625, 60.549766540527344, 30.49024200439453, -13.493049621582031, 9.577072143554688, 20.23865509033203, 27.188194274902344, 123.96173095703125, 19.557472229003906, 23.657711029052734, 57.3336181640625, 49.24256134033203, -5.779365539550781, -11.720787048339844, 18.298328399658203, 17.71849250793457, -2.3753433227539062, 32.86316680908203, 48.65081024169922, -9.12420654296875, 18.455535888671875, 88.9631118774414, 66.50436401367188, 25.344436645507812, 138.30850219726562, 82.94783020019531, 90.3520736694336, 63.33006286621094, 13.421394348144531, 32.068031311035156, 60.01513671875, -1.7223167419433594, 29.660245895385742, 3.358163833618164, 4.177623748779297, 45.17493438720703, 69.13750457763672, 16.250207901000977, 26.357177734375, 39.513214111328125], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000103.npy"}
|
|
{"epoch": 0.1512481644640235, "step": 104, "batch_size": 64, "mean": 28.46316909790039, "std": 37.551666259765625, "min": -47.095916748046875, "p10": -9.291133308410641, "median": 20.036930084228516, "p90": 75.32354125976563, "max": 143.6201171875, "pos_frac": 0.796875, "sample": [72.32426452636719, -2.9128494262695312, 11.184417724609375, 10.56024169921875, 52.363037109375, -0.3439483642578125, -19.90483856201172, 2.161785125732422, 3.5022201538085938, 33.22422790527344, -10.337564468383789, 26.492996215820312, 124.33840942382812, 46.21355438232422, 69.4819107055664, -0.49793243408203125, 76.05545043945312, 30.43718147277832, -10.907493591308594, 94.13595581054688, 28.114700317382812, 10.208984375, 20.92599105834961, 7.595939636230469, 12.667924880981445, 2.7993907928466797, 143.6201171875, -29.88721466064453, 47.40007019042969, 108.31039428710938, 112.58445739746094, 25.087631225585938, 10.941835403442383, 51.026634216308594, 10.756797790527344, 10.116409301757812, -6.849460601806641, -14.943031311035156, 86.44140625, 15.441230773925781, 46.08062744140625, 3.6206207275390625, 59.79393005371094, 2.3988571166992188, 73.61575317382812, 36.16724395751953, 29.056297302246094, 39.82125473022461, 25.528228759765625, 32.10960388183594, 3.0351181030273438, 71.00128173828125, 9.726097106933594, 21.872581481933594, 56.53587341308594, 57.36144256591797, -4.297719955444336, 26.053442001342773, -0.1927013397216797, 19.147869110107422, -10.790351867675781, 1.8804473876953125, -47.095916748046875, 9.281684875488281], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000104.npy"}
|
|
{"epoch": 0.1527165932452276, "step": 105, "batch_size": 64, "mean": 39.720542907714844, "std": 33.44864273071289, "min": -37.03321838378906, "p10": 3.2831264495849632, "median": 36.281681060791016, "p90": 81.85192871093751, "max": 151.2127227783203, "pos_frac": 0.90625, "sample": [36.56243896484375, 29.478683471679688, 42.424888610839844, 28.939865112304688, 37.96788024902344, 34.97838592529297, 60.70024108886719, 95.66669464111328, 10.902746200561523, -37.03321838378906, -1.4775314331054688, 69.18569946289062, 63.09283447265625, 13.013086318969727, 33.23768615722656, 41.23793411254883, -26.273902893066406, 5.317893981933594, -0.294158935546875, 88.65975189208984, 28.584152221679688, 89.96034240722656, 73.57049560546875, 16.25823974609375, 9.53082275390625, 68.31307220458984, 53.56880187988281, 35.26239776611328, 8.169036865234375, 36.00092315673828, 50.41532897949219, 127.498046875, 15.108863830566406, 43.47545623779297, 13.383491516113281, 18.738082885742188, 57.22754669189453, 76.71260070800781, 38.67210388183594, 15.35373306274414, 44.47527313232422, 84.0517578125, 31.018239974975586, 33.18107604980469, 27.949447631835938, 45.210235595703125, -0.06098365783691406, 57.29998779296875, 61.081058502197266, 76.718994140625, 28.37445068359375, 21.429542541503906, -11.877182006835938, 6.958766937255859, 51.60038757324219, 51.16656494140625, 45.890724182128906, 87.448974609375, 50.384368896484375, 151.2127227783203, 18.80945587158203, 2.411083221435547, 11.704708099365234, 63.58387756347656], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000105.npy"}
|
|
{"epoch": 0.15418502202643172, "step": 106, "batch_size": 64, "mean": 24.999935150146484, "std": 28.44190788269043, "min": -41.59678649902344, "p10": -7.478995513916013, "median": 24.707901000976562, "p90": 60.81417999267579, "max": 116.1808853149414, "pos_frac": 0.828125, "sample": [23.827194213867188, 3.826131820678711, 6.023721694946289, -1.7391777038574219, 17.288660049438477, 0.9685230255126953, 41.955535888671875, -4.995429992675781, 29.370506286621094, 57.01091384887695, 33.635650634765625, 31.888565063476562, 3.473033905029297, 65.1743392944336, 21.832656860351562, 70.00796508789062, 3.3881969451904297, 12.782936096191406, 59.48744201660156, -0.05572319030761719, 56.43926239013672, 33.291561126708984, 4.027824401855469, 66.20285034179688, 19.524232864379883, 35.463966369628906, 10.400520324707031, -13.9241943359375, 39.822654724121094, -22.129783630371094, 37.908836364746094, 15.610572814941406, 32.21847915649414, 36.56296157836914, -0.07668495178222656, 7.005683898925781, 21.1632080078125, 6.274505615234375, 72.57220458984375, 59.41284942626953, 46.19502258300781, 22.807525634765625, -41.59678649902344, 2.0879058837890625, 116.1808853149414, 28.292953491210938, 25.588607788085938, 33.38391876220703, 21.105224609375, 6.4916534423828125, 1.1936759948730469, 45.25990295410156, 85.72220611572266, 32.03199005126953, 43.812408447265625, 44.48503112792969, -13.761314392089844, 28.310699462890625, -8.543380737304688, 28.383209228515625, 61.382781982421875, -27.08061981201172, -13.756370544433594, 39.09562683105469], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000106.npy"}
|
|
{"epoch": 0.15565345080763582, "step": 107, "batch_size": 64, "mean": 28.321508407592773, "std": 33.06311798095703, "min": -61.98193359375, "p10": -8.305241966247552, "median": 18.323246955871582, "p90": 72.02211303710938, "max": 110.70191955566406, "pos_frac": 0.875, "sample": [66.1512451171875, 84.55262756347656, 39.782806396484375, 59.6849365234375, 20.686866760253906, 25.478668212890625, 42.156524658203125, -1.9917850494384766, -61.98193359375, 6.418216705322266, 13.875429153442383, 62.891998291015625, -22.955108642578125, 26.790576934814453, 9.451828002929688, 9.027351379394531, 12.571672439575195, 73.03421020507812, 1.0675430297851562, -19.146846771240234, 81.52875518798828, 59.46995544433594, 64.37921142578125, 4.332021713256836, -11.011009216308594, 4.745351791381836, 2.217517852783203, 6.065521240234375, 110.70191955566406, 57.542823791503906, 19.757997512817383, 16.88849639892578, 74.06110382080078, 33.59355163574219, 54.18780517578125, 76.05059051513672, 14.632436752319336, 25.67090606689453, 12.518987655639648, -13.236869812011719, 52.819915771484375, -34.729713439941406, 53.87329864501953, 65.52821350097656, 12.738716125488281, 69.66055297851562, 3.8419113159179688, 73.97994995117188, 46.82612609863281, 46.411766052246094, 15.613906860351562, 16.24229621887207, 11.08642578125, -18.498985290527344, 5.975006103515625, 5.5806732177734375, 11.504989624023438, 60.618377685546875, 13.041961669921875, 23.460054397583008, 50.70367431640625, 69.48226928710938, 3.850677490234375, 11.320632934570312], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000107.npy"}
|
|
{"epoch": 0.15712187958883994, "step": 108, "batch_size": 64, "mean": 32.215179443359375, "std": 43.8306884765625, "min": -41.12022399902344, "p10": -17.951074790954586, "median": 23.89415740966797, "p90": 96.56323242187501, "max": 156.31951904296875, "pos_frac": 0.765625, "sample": [31.119888305664062, 103.88706970214844, 34.1348876953125, -30.61041259765625, -41.12022399902344, 78.18521118164062, 27.405977249145508, 7.860076904296875, 156.31951904296875, -1.6885604858398438, -4.6751251220703125, 103.51432037353516, 7.14208984375, 2.6286354064941406, -2.3782806396484375, 55.593833923339844, -4.278064727783203, 71.50813293457031, 121.48658752441406, 88.89363861083984, 93.31851196289062, 64.73789978027344, 0.33307838439941406, -18.964204788208008, 36.79771423339844, 9.9346923828125, 23.43771743774414, 13.796627044677734, 67.23077392578125, 6.221439361572266, 17.601806640625, -0.2698345184326172, 62.41876983642578, -15.587104797363281, 137.81109619140625, -22.8052978515625, 67.19117736816406, -23.081127166748047, -6.698051452636719, 3.995485305786133, 23.80408477783203, 9.2421875, 38.69145202636719, 114.43228149414062, 26.50739288330078, 7.972446441650391, 70.17346954345703, 56.85997009277344, 24.383630752563477, -37.35700988769531, 9.403656005859375, 45.468055725097656, 97.95382690429688, 48.74163055419922, 2.2797775268554688, -1.5690135955810547, 37.62519073486328, 15.558605194091797, 60.91094970703125, -38.53178024291992, 23.984230041503906, 40.37361526489258, 45.171478271484375, 17.341087341308594], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000108.npy"}
|
|
{"epoch": 0.15859030837004406, "step": 109, "batch_size": 64, "mean": 35.33580017089844, "std": 37.015541076660156, "min": -38.97137451171875, "p10": -6.521308135986326, "median": 31.567869186401367, "p90": 84.64405670166016, "max": 137.67724609375, "pos_frac": 0.828125, "sample": [43.98033905029297, 6.137035369873047, 95.34007263183594, 68.01947784423828, 19.31658172607422, 45.6922492980957, -32.30491638183594, 12.63677978515625, 15.065086364746094, 45.077964782714844, 100.57794189453125, 33.811702728271484, 56.64063262939453, 36.431365966796875, 48.21485900878906, 6.629751205444336, 20.7595272064209, 33.805999755859375, -8.392684936523438, -4.515617370605469, -2.797588348388672, 137.67724609375, -0.13987159729003906, -7.380889892578125, 25.054540634155273, 3.5804824829101562, 102.00747680664062, 28.48712158203125, 21.827802658081055, -25.150279998779297, 48.70307922363281, 54.80955505371094, 7.286411285400391, 49.12266540527344, 47.577423095703125, 33.61775207519531, 75.45309448242188, 58.783958435058594, 83.53778839111328, 85.11817169189453, 5.080436706542969, 9.732284545898438, -38.97137451171875, 25.102500915527344, 2.367340087890625, 61.24517059326172, 31.802627563476562, 32.3516731262207, 19.124223709106445, 70.96548461914062, -29.129486083984375, 28.654022216796875, 31.333110809326172, 88.39744567871094, 102.14619445800781, 25.163108825683594, 73.4934310913086, 26.5198974609375, 72.68844604492188, 82.30167388916016, -3.11444091796875, -10.469039916992188, 5.621698379516602, 78.98474884033203], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000109.npy"}
|
|
{"epoch": 0.16005873715124816, "step": 110, "batch_size": 64, "mean": 31.90929412841797, "std": 40.52793502807617, "min": -72.98228454589844, "p10": -5.067333126068114, "median": 26.097037315368652, "p90": 82.87039947509767, "max": 142.71920776367188, "pos_frac": 0.828125, "sample": [83.90556335449219, 37.77574157714844, 22.91977310180664, 46.38941955566406, 46.05113983154297, 31.47521209716797, 40.03688430786133, 10.896636962890625, -72.98228454589844, 36.76549530029297, 52.558807373046875, 15.5787353515625, -35.533599853515625, 15.771087646484375, 13.928474426269531, -0.24219322204589844, 26.096078872680664, 136.26931762695312, 88.65744018554688, 22.815475463867188, 3.901803970336914, -3.5261192321777344, 142.71920776367188, 101.44480895996094, 80.45501708984375, 74.35395812988281, 12.175971984863281, -2.1053237915039062, -5.545692443847656, 53.42826843261719, -27.713821411132812, -12.932125091552734, 7.879602432250977, 16.85283660888672, 5.662174224853516, 122.25350952148438, 41.425453186035156, 72.43748474121094, 78.474853515625, 3.3088245391845703, 59.44947814941406, -3.9511613845825195, 26.529373168945312, 34.392024993896484, 14.747509002685547, 19.756378173828125, 65.18401336669922, -44.85699462890625, 57.199249267578125, -15.13702392578125, 29.615737915039062, 2.2751617431640625, 41.983238220214844, 26.09799575805664, 43.04428482055664, 35.381614685058594, 18.23362922668457, 22.165740966796875, 15.651477813720703, 42.11730194091797, 53.74855041503906, 1.8148670196533203, 2.246734619140625, 110.4217529296875], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000110.npy"}
|
|
{"epoch": 0.16152716593245228, "step": 111, "batch_size": 64, "mean": 35.16822814941406, "std": 43.93067169189453, "min": -61.68951416015625, "p10": -16.18607482910156, "median": 28.415714263916016, "p90": 90.92248611450196, "max": 154.84954833984375, "pos_frac": 0.78125, "sample": [154.84954833984375, -30.6837158203125, -61.68951416015625, 83.90249633789062, -6.2723388671875, 41.52678680419922, 9.966781616210938, 27.734390258789062, 21.347530364990234, 110.95936584472656, 26.466079711914062, 65.68099975585938, 34.53838348388672, 43.105560302734375, 24.279151916503906, 74.44168090820312, 92.76302337646484, -14.202072143554688, 78.54658508300781, 8.257776260375977, 59.30268096923828, -17.036361694335938, -12.999439239501953, -4.1326904296875, 52.36891174316406, 86.12376403808594, 0.987884521484375, 85.40792846679688, -22.208114624023438, 10.366920471191406, -10.643768310546875, 27.634084701538086, 97.91738891601562, 18.183853149414062, 115.29385375976562, 38.654083251953125, 25.55986785888672, 42.11616134643555, 54.209068298339844, 107.54088592529297, 19.735801696777344, -26.852523803710938, 16.394128799438477, 23.447643280029297, 110.447998046875, 47.106201171875, 27.67668914794922, 39.41424560546875, 84.72055053710938, 1.7419719696044922, 86.62789916992188, 33.127079010009766, 73.0366439819336, 62.89491271972656, 39.876007080078125, 9.498659133911133, -51.74884033203125, 29.09703826904297, 21.915559768676758, 36.859519958496094, -4.465375900268555, 73.80157470703125, -37.60523986816406, -6.146995544433594], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000111.npy"}
|
|
{"epoch": 0.16299559471365638, "step": 112, "batch_size": 64, "mean": 23.074462890625, "std": 31.793039321899414, "min": -48.330848693847656, "p10": -16.33640365600586, "median": 19.884469032287598, "p90": 71.89582672119143, "max": 97.03948974609375, "pos_frac": 0.765625, "sample": [33.67824172973633, 97.03948974609375, 63.2288818359375, -4.944972991943359, -29.53017807006836, 24.960643768310547, 28.809524536132812, 20.596450805664062, 16.442047119140625, -6.856849670410156, 83.04586791992188, 67.21270751953125, 1.7612934112548828, -0.5723114013671875, 73.90287780761719, 53.86101150512695, 34.58292007446289, 46.99324035644531, 36.47104263305664, 29.552169799804688, 38.08343505859375, 33.205650329589844, 55.48571014404297, -6.185089111328125, 21.923233032226562, 21.402219772338867, 26.861488342285156, 20.014354705810547, -18.294906616210938, 16.633617401123047, 4.417074203491211, -24.72092056274414, 22.83881378173828, 86.03243255615234, 91.43212127685547, -21.352783203125, 34.01045227050781, 13.043401718139648, 54.43617248535156, 3.2871952056884766, -48.330848693847656, 80.07469940185547, 16.30384063720703, 9.485710144042969, -5.295989990234375, 18.866924285888672, 46.36614990234375, 2.373981475830078, 13.910186767578125, 76.07968139648438, 15.014686584472656, 6.904834747314453, 38.55485534667969, -23.565216064453125, 8.176149368286133, -15.796562194824219, -16.567764282226562, -4.6233978271484375, 59.60663604736328, 1.111532211303711, 19.75458335876465, 6.039207458496094, -4.690757751464844, 34.22478485107422], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000112.npy"}
|
|
{"epoch": 0.1644640234948605, "step": 113, "batch_size": 64, "mean": 25.518264770507812, "std": 28.999862670898438, "min": -23.60161590576172, "p10": -9.850836181640624, "median": 27.795425415039062, "p90": 65.70827407836914, "max": 91.60674285888672, "pos_frac": 0.78125, "sample": [-23.60161590576172, 58.09165954589844, -12.595954895019531, 66.45181274414062, 12.979179382324219, 27.664337158203125, 17.14093780517578, 34.13683319091797, 7.919809341430664, -2.1075592041015625, 7.565223693847656, 60.77276611328125, 45.04802703857422, 82.21649169921875, 29.372713088989258, -14.229619979858398, 0.43157196044921875, -5.039375305175781, 11.76055908203125, 61.365745544433594, -14.457290649414062, 37.2540283203125, 63.18890380859375, -10.385833740234375, 43.19886016845703, 1.8087005615234375, 7.080644607543945, 91.60674285888672, 35.89373016357422, -23.066635131835938, 13.859283447265625, 67.12136840820312, 29.129215240478516, -8.602508544921875, 32.74493408203125, 46.28888702392578, 11.622817993164062, 65.80208587646484, 27.926513671875, -3.4021835327148438, 43.072776794433594, 58.043548583984375, -8.13294792175293, 2.869710922241211, -16.525375366210938, 38.42068862915039, 29.102989196777344, 32.766746520996094, -8.014312744140625, 9.998987197875977, 84.77479553222656, 0.937591552734375, 29.913162231445312, 35.0030517578125, 75.7682113647461, 65.4893798828125, 30.6307373046875, 56.280067443847656, -3.434915542602539, 10.140426635742188, 10.883331298828125, 11.850818634033203, 23.923877716064453, 39.44970703125], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000113.npy"}
|
|
{"epoch": 0.16593245227606462, "step": 114, "batch_size": 64, "mean": 39.156715393066406, "std": 49.000816345214844, "min": -24.39351463317871, "p10": -14.741216659545897, "median": 37.90251541137695, "p90": 86.21461868286137, "max": 260.87823486328125, "pos_frac": 0.765625, "sample": [134.89584350585938, 76.7435073852539, 47.102203369140625, 50.56090545654297, 21.017440795898438, 99.90968322753906, 15.271825790405273, 38.0081787109375, -15.481552124023438, 50.337364196777344, -24.39351463317871, 50.598052978515625, 42.48613739013672, 10.112709045410156, -23.68255615234375, 63.85935974121094, 10.71120834350586, -16.913253784179688, 28.43689727783203, 90.27366638183594, 14.282163619995117, 40.7412109375, -9.827728271484375, 91.62419891357422, 40.62847900390625, 52.95752716064453, 25.002357482910156, 20.85723876953125, 25.26446533203125, 51.936275482177734, 54.43302917480469, -2.7852020263671875, -5.001312255859375, 51.124664306640625, -13.01376724243164, 11.985052108764648, 34.906036376953125, 48.64459228515625, 51.11647033691406, 74.17552185058594, 51.50618362426758, 71.64202117919922, 4.065757751464844, 38.47217559814453, 28.672161102294922, -5.470268249511719, 188.0873565673828, 5.1373138427734375, 37.796852111816406, 118.87650299072266, -2.9128856658935547, 62.516632080078125, -21.13823699951172, -16.317214965820312, 260.87823486328125, 52.751033782958984, -2.482820510864258, 61.31983947753906, 37.22605895996094, -23.124622344970703, 19.958168029785156, 73.32545471191406, -7.575496673583984, 63.91423797607422], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000114.npy"}
|
|
{"epoch": 0.16740088105726872, "step": 115, "batch_size": 64, "mean": 27.70241355895996, "std": 48.197357177734375, "min": -100.52423095703125, "p10": -14.767607116699217, "median": 17.78662109375, "p90": 102.97779769897465, "max": 183.52822875976562, "pos_frac": 0.765625, "sample": [3.033649444580078, -2.9097366333007812, 107.58647155761719, 42.236358642578125, -24.7275390625, 1.9905319213867188, -24.811508178710938, 10.62664794921875, 7.846744537353516, 16.63813018798828, 18.34215545654297, -7.4795074462890625, 39.45193099975586, 45.54853057861328, 19.823740005493164, 128.9022979736328, 21.523771286010742, -1.802520751953125, 183.52822875976562, -9.399961471557617, 52.43749237060547, 3.3360977172851562, 107.30253601074219, 58.381103515625, 128.51156616210938, -13.972160339355469, 24.892745971679688, 26.87057876586914, 5.560573577880859, 69.8153076171875, 128.9619598388672, 72.88140869140625, 7.775291442871094, -88.13553619384766, 8.538986206054688, 32.00708770751953, 45.26216125488281, 14.942451477050781, 13.33880615234375, 36.70203399658203, 24.840682983398438, 34.50367736816406, 27.99024200439453, 22.688201904296875, 110.81192016601562, -13.29217529296875, 17.23108673095703, 12.451591491699219, -4.5265655517578125, 9.030776977539062, 17.124393463134766, 61.03291320800781, 2.7560653686523438, 19.53237533569336, -15.899890899658203, 92.8867416381836, 56.2215576171875, -9.112955093383789, 53.72111511230469, -100.52423095703125, 75.375, -23.883384704589844, 5.744928359985352, -15.108512878417969], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000115.npy"}
|
|
{"epoch": 0.16886930983847284, "step": 116, "batch_size": 64, "mean": 29.48697280883789, "std": 34.467803955078125, "min": -21.82828140258789, "p10": -7.9784214019775375, "median": 23.20874309539795, "p90": 75.95429687500001, "max": 174.19700622558594, "pos_frac": 0.796875, "sample": [7.607412338256836, 2.1763572692871094, -21.82828140258789, -14.063568115234375, 21.968202590942383, 34.5205078125, 41.32684326171875, 20.112083435058594, 40.769195556640625, 47.3526611328125, 20.08763885498047, -0.9774150848388672, 34.024505615234375, 24.624801635742188, 6.959369659423828, -5.521425247192383, 42.385589599609375, -17.322120666503906, 64.24327087402344, 74.41606140136719, 29.848602294921875, -1.0892486572265625, 18.127670288085938, 10.587562561035156, 39.389404296875, 19.84832763671875, 47.95135498046875, 84.9637451171875, 21.08586883544922, -18.666282653808594, 93.7867431640625, 32.3741455078125, 19.92022705078125, 30.398788452148438, -8.610368728637695, -16.32825469970703, 10.792125701904297, 12.290611267089844, 4.673063278198242, 81.590576171875, 5.119993209838867, 92.41484069824219, 25.215042114257812, 63.231842041015625, 55.90370178222656, 20.788127899169922, 49.936927795410156, -14.510513305664062, -6.503877639770508, 61.325050354003906, 24.449283599853516, -5.676006317138672, 174.19700622558594, 51.03227233886719, 14.6728515625, 37.957359313964844, 47.621124267578125, 47.65340042114258, -5.097131729125977, 12.328857421875, 1.4865303039550781, 41.01617431640625, 80.19363403320312, 76.61354064941406], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000116.npy"}
|
|
{"epoch": 0.17033773861967694, "step": 117, "batch_size": 64, "mean": 25.843429565429688, "std": 37.76962661743164, "min": -68.26296997070312, "p10": -9.238124084472652, "median": 22.436553955078125, "p90": 70.52203369140628, "max": 148.67520141601562, "pos_frac": 0.78125, "sample": [14.32979965209961, 42.260169982910156, 37.36341857910156, -3.709636688232422, 19.365285873413086, 3.0613632202148438, 19.698392868041992, 24.813385009765625, 15.79583740234375, 31.6768856048584, 38.855892181396484, 53.75093078613281, 95.52154541015625, 107.27813720703125, 23.209190368652344, 31.504196166992188, -33.44694519042969, 60.914398193359375, 24.709259033203125, -63.092742919921875, -18.543533325195312, 7.887947082519531, 16.424816131591797, 50.436771392822266, 39.39534378051758, -2.479644775390625, -10.851150512695312, 2.697509765625, 25.258743286132812, -5.474395751953125, 31.138038635253906, 99.29934692382812, -4.6209564208984375, 18.49249267578125, 108.36568450927734, 14.360382080078125, 148.67520141601562, 15.747299194335938, 20.708290100097656, 49.916168212890625, 10.043331146240234, 2.4031333923339844, 6.906040191650391, -3.146209716796875, -68.26296997070312, 21.663917541503906, 64.31874084472656, 27.975387573242188, 79.85721588134766, 30.855222702026367, -27.720687866210938, -5.221210479736328, 6.894573211669922, 45.673709869384766, 73.18058776855469, 61.1932373046875, 7.40211296081543, 39.51764678955078, 48.98596954345703, 26.80478286743164, 29.751693725585938, -1.189300537109375, -22.77044677734375, 48.169960021972656], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000117.npy"}
|
|
{"epoch": 0.17180616740088106, "step": 118, "batch_size": 64, "mean": 29.027183532714844, "std": 34.56831359863281, "min": -30.910903930664062, "p10": -4.980347442626953, "median": 20.406079292297363, "p90": 78.31969757080081, "max": 123.07159423828125, "pos_frac": 0.8125, "sample": [-4.8310089111328125, 16.862163543701172, 2.735445022583008, -18.045562744140625, 18.779621124267578, 14.951000213623047, 54.45893478393555, 39.12084197998047, 41.82001495361328, 81.12638854980469, 65.79093933105469, 19.749664306640625, 93.28047180175781, 25.50249481201172, -5.044349670410156, 44.22654724121094, -5.748485565185547, -4.4418792724609375, 26.918659210205078, 123.07159423828125, 86.13165283203125, -5.92790412902832, -30.910903930664062, 3.5814895629882812, -27.49444580078125, 18.88457679748535, 2.717559814453125, 25.26413917541504, 4.773868560791016, 9.182708740234375, 11.888999938964844, 52.583091735839844, -4.4655303955078125, 63.6953125, 19.86467742919922, 37.32647705078125, 20.947481155395508, 29.177978515625, 111.63360595703125, 1.3433685302734375, 19.69860076904297, -11.060779571533203, 1.2685012817382812, 30.805892944335938, 106.305908203125, 60.11634063720703, 103.658935546875, 37.04533386230469, 63.74292755126953, 12.145265579223633, 2.3863296508789062, 49.79773712158203, 71.770751953125, 53.71715545654297, 39.76488494873047, 25.2625732421875, 30.972341537475586, 12.992431640625, 1.6013813018798828, 23.125572204589844, -3.5347213745117188, 66.61917877197266, 1.63641357421875, -2.5808029174804688], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000118.npy"}
|
|
{"epoch": 0.17327459618208516, "step": 119, "batch_size": 64, "mean": 29.030445098876953, "std": 41.049774169921875, "min": -43.888153076171875, "p10": -20.287868499755852, "median": 25.80705451965332, "p90": 74.94660644531254, "max": 155.4345703125, "pos_frac": 0.734375, "sample": [18.337182998657227, 37.684539794921875, 23.822311401367188, 108.10302734375, 38.509613037109375, -14.121078491210938, -4.514823913574219, 40.15613555908203, -5.636974334716797, 48.70682907104492, -0.8043975830078125, -3.1494598388671875, 155.4345703125, 43.169639587402344, 9.481021881103516, 93.96482849121094, 21.188465118408203, 10.604633331298828, 146.70005798339844, 39.831939697265625, 61.459564208984375, 59.82530975341797, 59.408287048339844, -0.4934959411621094, 24.91730499267578, 32.224449157714844, -24.76964569091797, 12.328075408935547, 44.59602355957031, 44.7164306640625, 4.0069732666015625, -5.8293609619140625, 78.65693664550781, 26.69680404663086, 123.39566040039062, -9.308738708496094, 47.51378631591797, 48.08708190917969, 2.300111770629883, 19.121662139892578, -26.202880859375, 31.18433380126953, -33.82452392578125, 99.9858169555664, 34.1124267578125, 47.75251770019531, -7.4476776123046875, -23.163726806640625, 65.7672348022461, 33.41404342651367, -22.93077850341797, -43.888153076171875, 7.258140563964844, 59.482147216796875, 22.004348754882812, 45.68974685668945, 39.83440399169922, 8.549787521362305, 66.28916931152344, -3.8156566619873047, -34.059600830078125, 3.2192840576171875, 27.841140747070312, 4.575675964355469], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000119.npy"}
|
|
{"epoch": 0.17474302496328928, "step": 120, "batch_size": 64, "mean": 39.31489562988281, "std": 43.7486457824707, "min": -50.50935363769531, "p10": -11.686404800415032, "median": 33.8778076171875, "p90": 103.7399627685547, "max": 142.73983764648438, "pos_frac": 0.875, "sample": [120.22406005859375, 63.24745178222656, 29.635345458984375, 89.88446044921875, 31.288333892822266, 12.348295211791992, -36.29078674316406, 84.28733825683594, 18.900602340698242, 54.660728454589844, 50.286460876464844, 125.11479187011719, 63.362388610839844, 18.13874053955078, 15.57345962524414, 18.43218994140625, 97.46035766601562, 39.09748077392578, 52.066734313964844, -26.00371551513672, -31.680374145507812, 105.32701110839844, 34.00480651855469, 33.75080871582031, 35.18596649169922, 28.292282104492188, 34.208587646484375, 8.773059844970703, 117.37823486328125, -50.50935363769531, -14.802627563476562, 41.38673400878906, 28.99793243408203, 3.6821746826171875, 79.91636657714844, 5.346963882446289, 142.73983764648438, 47.00387954711914, 53.595497131347656, 2.15582275390625, 5.905185699462891, 0.118865966796875, 49.90666961669922, 124.871337890625, 28.435958862304688, 38.151123046875, 100.03684997558594, 142.244384765625, 28.430953979492188, 37.76853942871094, 0.9322280883789062, 65.71404266357422, -25.458663940429688, 18.4130859375, 43.92028045654297, 19.643753051757812, 1.8944816589355469, 82.58360290527344, 51.90299987792969, -4.415218353271484, 61.507118225097656, -15.643180847167969, 29.24542236328125, 3.5753173828125], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000120.npy"}
|
|
{"epoch": 0.1762114537444934, "step": 121, "batch_size": 64, "mean": 43.52017593383789, "std": 51.80488204956055, "min": -82.1026611328125, "p10": -6.903961944580077, "median": 41.13285827636719, "p90": 117.65652770996097, "max": 166.94476318359375, "pos_frac": 0.828125, "sample": [5.764923095703125, 1.7399826049804688, 0.387939453125, 165.4700469970703, -4.860830307006836, 8.111412048339844, 71.2642822265625, -10.79983139038086, 69.92266845703125, 54.074623107910156, 5.5435028076171875, 51.55480194091797, 109.88816833496094, 92.68326568603516, 58.57643127441406, -6.157173156738281, 63.97654724121094, 56.25590896606445, 48.77777862548828, -24.542387008666992, 29.098421096801758, 99.44473266601562, -36.572288513183594, 91.38914489746094, 131.54788208007812, 45.28352355957031, 26.882186889648438, 9.874462127685547, 91.40625, 4.171901702880859, 58.273834228515625, -15.423393249511719, 44.94657897949219, 55.54975891113281, 4.921499252319336, 141.49130249023438, 6.4764862060546875, 137.84698486328125, 33.00749969482422, 163.40301513671875, 3.9472579956054688, 103.1648941040039, -7.2240142822265625, 9.174758911132812, -10.92510986328125, 26.220909118652344, 65.78730773925781, 14.649406433105469, 5.2400970458984375, 42.84226989746094, 6.71142578125, 14.0396728515625, 51.19999694824219, 94.941650390625, 120.98582458496094, 61.10283660888672, -82.1026611328125, 43.20623779296875, -4.250152587890625, 64.59465026855469, 166.94476318359375, 17.894750595092773, -2.9310169219970703, 39.42344665527344], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000121.npy"}
|
|
{"epoch": 0.1776798825256975, "step": 122, "batch_size": 64, "mean": 53.68198776245117, "std": 48.700286865234375, "min": -54.0966796875, "p10": 3.671397972106936, "median": 44.95051956176758, "p90": 116.43431625366212, "max": 173.92881774902344, "pos_frac": 0.921875, "sample": [109.28819274902344, 14.51434326171875, 50.51683807373047, 67.94380950927734, 17.23292350769043, 24.519058227539062, 53.163787841796875, 77.98326873779297, 22.30022430419922, 87.30717468261719, 12.754533767700195, 93.9979019165039, 96.44731903076172, 172.7149658203125, 85.83812713623047, 32.609107971191406, 14.044601440429688, 52.37311553955078, 114.31950378417969, 44.626564025878906, 12.507949829101562, 111.88009643554688, 26.741466522216797, 0.6113128662109375, 50.45414733886719, 19.585052490234375, 47.60089874267578, 109.97894287109375, 61.690895080566406, 57.01013946533203, 62.152259826660156, -18.485820770263672, 135.43902587890625, -5.092729568481445, 41.021339416503906, 113.47735595703125, 52.770263671875, 30.852317810058594, 117.26360321044922, 127.98649597167969, 6.394094467163086, 45.27447509765625, -24.001609802246094, 173.92881774902344, 24.00940704345703, 35.26005935668945, 28.65204620361328, 25.828495025634766, 30.560569763183594, -54.0966796875, -4.5618743896484375, 22.753005981445312, 21.018083572387695, 64.52387237548828, 166.94168090820312, 7.921833038330078, 88.91458129882812, 44.602699279785156, 2.504528045654297, 9.813385009765625, 123.95010375976562, 59.97395324707031, 21.041893005371094, 114.49931335449219], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000122.npy"}
|
|
{"epoch": 0.17914831130690162, "step": 123, "batch_size": 64, "mean": 35.69050216674805, "std": 55.0960693359375, "min": -113.9572525024414, "p10": -17.520436286926266, "median": 24.16555404663086, "p90": 105.55008850097657, "max": 187.16372680664062, "pos_frac": 0.71875, "sample": [58.88671112060547, 64.33533477783203, -6.501014709472656, 6.733058929443359, 56.652587890625, 53.68251037597656, 85.77290344238281, 78.95083618164062, 61.692543029785156, 41.31256103515625, 0.4195404052734375, -0.31749725341796875, 11.300960540771484, -22.44287872314453, 130.87767028808594, 67.18681335449219, 15.35336685180664, -5.009063720703125, -3.4887237548828125, 45.45863342285156, -1.1953277587890625, 102.21823120117188, 44.38505554199219, -55.163177490234375, 21.829750061035156, 67.46868896484375, 187.16372680664062, 19.025510787963867, 106.97802734375, 117.19012451171875, -18.623517990112305, 63.032020568847656, 19.471378326416016, 7.2504425048828125, 71.30995178222656, 96.61643981933594, 26.501358032226562, 42.94499206542969, 142.5473175048828, -14.946578979492188, 64.4339599609375, 59.65885925292969, -32.881832122802734, -38.78456115722656, 60.38648986816406, -0.6959381103515625, -30.766727447509766, -2.5812244415283203, -14.285270690917969, 182.73367309570312, -14.571998596191406, 45.94660949707031, 36.094139099121094, 138.41290283203125, 0.7645492553710938, 19.942834854125977, 96.40501403808594, 30.196252822875977, -113.9572525024414, 4.233299255371094, 5.989128112792969, -5.210491180419922, 0.6649646759033203, 5.203474044799805], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000123.npy"}
|
|
{"epoch": 0.18061674008810572, "step": 124, "batch_size": 64, "mean": 50.780303955078125, "std": 69.63729858398438, "min": -70.37300109863281, "p10": -18.06525993347168, "median": 33.705087661743164, "p90": 132.46677856445316, "max": 294.0536193847656, "pos_frac": 0.828125, "sample": [109.40379333496094, -17.815093994140625, 36.72526550292969, 28.821182250976562, 12.473730087280273, 114.61738586425781, -70.37300109863281, -7.416358947753906, 106.31790161132812, 153.5533447265625, 0.6390609741210938, 26.05266571044922, 4.258668899536133, 51.358192443847656, 49.788063049316406, 191.68545532226562, 116.92315673828125, 39.2863655090332, -4.2154388427734375, 52.750999450683594, -65.51322937011719, -29.012619018554688, 108.12838745117188, 3.9215126037597656, 159.54220581054688, 107.53965759277344, 55.69061279296875, 3.810821533203125, 43.91859436035156, 92.93067932128906, -18.172473907470703, 29.89249038696289, 56.89674377441406, 22.259119033813477, 8.877914428710938, 254.93206787109375, 84.25030517578125, 135.31317138671875, 18.483299255371094, 125.72554016113281, 75.97628021240234, -24.743629455566406, 21.699337005615234, -14.792465209960938, 33.313148498535156, 88.70057678222656, -58.88525390625, 60.26698303222656, 19.333608627319336, 27.012420654296875, 34.09702682495117, 48.112022399902344, -40.8056640625, 171.98898315429688, 21.021743774414062, 294.0536193847656, 24.130279541015625, 125.8251953125, 0.9748268127441406, 26.287208557128906, 40.46717834472656, 55.416595458984375, 11.894207000732422, 14.345268249511719], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000124.npy"}
|
|
{"epoch": 0.18208516886930984, "step": 125, "batch_size": 64, "mean": 46.54182052612305, "std": 54.71077346801758, "min": -49.08257293701172, "p10": -12.552694702148434, "median": 27.031736373901367, "p90": 123.73630523681643, "max": 185.8450927734375, "pos_frac": 0.8125, "sample": [-1.7173652648925781, 98.9063720703125, 60.37290954589844, 13.528091430664062, -2.0758056640625, 8.772029876708984, 84.96604919433594, -14.164138793945312, 116.11930847167969, 19.270612716674805, 6.460487365722656, -22.28268051147461, 22.56583595275879, 11.068195343017578, 79.72896575927734, 74.68161010742188, 59.89286804199219, 46.79657745361328, 13.042427062988281, 8.437164306640625, 97.61761474609375, 16.36102294921875, -24.893402099609375, 8.777130126953125, 75.13890075683594, 185.8450927734375, -29.162155151367188, 27.82099723815918, 71.25357055664062, 22.3046875, 6.1596221923828125, -49.08257293701172, 27.422279357910156, 14.580337524414062, 26.641193389892578, 161.2788543701172, 3.218181610107422, 159.69088745117188, 93.62574768066406, -9.045299530029297, 10.164878845214844, 89.62245178222656, 127.54141235351562, 77.90401458740234, 160.59829711914062, 71.82508850097656, 53.675804138183594, 36.56707000732422, 127.000732421875, 46.50299835205078, 13.347549438476562, 96.37687683105469, 30.057350158691406, -5.755300521850586, 86.24478149414062, 25.618404388427734, -9.218978881835938, 100.51756286621094, 26.319107055664062, -34.68402099609375, 164.42637634277344, 107.59528350830078, -13.981430053710938, 20.48800277709961], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000125.npy"}
|
|
{"epoch": 0.18355359765051396, "step": 126, "batch_size": 64, "mean": 42.55894470214844, "std": 51.13926315307617, "min": -62.190574645996094, "p10": -12.604449462890624, "median": 31.66849136352539, "p90": 125.35473785400391, "max": 169.17898559570312, "pos_frac": 0.859375, "sample": [88.3193359375, 50.8734130859375, 11.250402450561523, 75.14913177490234, 11.860511779785156, 32.872413635253906, -25.227920532226562, 2.1405105590820312, 42.52674865722656, 126.88446044921875, -21.074447631835938, 12.265544891357422, 148.55316162109375, 25.883522033691406, 66.16588592529297, 0.8828659057617188, 99.21463775634766, 33.227989196777344, 92.96128845214844, 47.97377395629883, 39.22422790527344, 43.279117584228516, -4.598535537719727, 27.877628326416016, 48.69731140136719, 132.12503051757812, -12.781631469726562, 121.78538513183594, 29.374378204345703, 40.2633171081543, 18.70656967163086, 20.78223419189453, 47.877166748046875, 15.085365295410156, 137.56430053710938, 38.90895080566406, 51.55474853515625, -52.14891052246094, 94.07232666015625, 2.062816619873047, -28.688812255859375, -12.191024780273438, 169.17898559570312, 3.240449905395508, 21.252111434936523, 8.726490020751953, 4.836917877197266, 16.528968811035156, 97.94920349121094, 85.35221862792969, -62.190574645996094, 160.70565795898438, 64.36160278320312, 17.784820556640625, 25.709381103515625, 68.41178894042969, 30.464569091796875, 57.57813262939453, 16.204307556152344, -24.54125213623047, 10.166526794433594, 75.3749008178711, 12.212028503417969, 142.929931640625], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000126.npy"}
|
|
{"epoch": 0.18502202643171806, "step": 127, "batch_size": 64, "mean": 33.435821533203125, "std": 44.431488037109375, "min": -69.23280334472656, "p10": -19.754435729980468, "median": 32.418033599853516, "p90": 86.52863159179688, "max": 131.97596740722656, "pos_frac": 0.78125, "sample": [43.833106994628906, 63.901710510253906, -4.250040054321289, 38.87413787841797, 3.673614501953125, -56.97080993652344, 117.84542846679688, 11.85137939453125, 84.45203399658203, -67.98052978515625, 8.289531707763672, 45.05670928955078, 63.63294982910156, -24.614423751831055, 18.08652687072754, 28.834440231323242, 69.1346664428711, 10.943944931030273, -69.23280334472656, 64.92555236816406, 25.589881896972656, 63.48291015625, 98.47993469238281, 20.904754638671875, 58.75349807739258, 125.9122085571289, 25.273338317871094, 60.054443359375, -8.170066833496094, 20.34874725341797, 105.22200775146484, 45.636146545410156, 80.98362731933594, 19.907886505126953, 32.884185791015625, 110.85838317871094, -1.9071998596191406, -12.20452880859375, 21.0063533782959, -1.851278305053711, 14.495960235595703, 87.4186019897461, 7.86724853515625, 52.624061584472656, 36.08442687988281, 54.42778778076172, 39.805564880371094, 82.70465087890625, 79.04454803466797, 46.792633056640625, -31.932647705078125, 35.858978271484375, 17.835567474365234, 131.97596740722656, -43.84880828857422, 42.6693115234375, 25.837684631347656, 82.5833511352539, 39.413330078125, 10.176029205322266, 31.951881408691406, -5.982276916503906, -19.863388061523438, -19.500213623046875], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000127.npy"}
|
|
{"epoch": 0.18649045521292218, "step": 128, "batch_size": 64, "mean": 34.360923767089844, "std": 53.03660202026367, "min": -116.26608276367188, "p10": -22.239828109741204, "median": 28.02484130859375, "p90": 111.28645172119141, "max": 195.87156677246094, "pos_frac": 0.75, "sample": [34.136234283447266, 35.941261291503906, 105.75508117675781, -13.931190490722656, 82.16816711425781, -30.896549224853516, 88.941650390625, 115.90448760986328, 23.17760467529297, 29.47906494140625, 122.42337036132812, 38.95323181152344, 39.94218444824219, 25.75021743774414, -46.697486877441406, 83.66769409179688, 103.48811340332031, 5.48188591003418, 2.858980178833008, 62.23054122924805, -48.818359375, -2.9074859619140625, 195.87156677246094, -116.26608276367188, 111.7864990234375, 29.181922912597656, 94.62567138671875, 115.47321319580078, 36.746055603027344, 41.3785400390625, -4.114818572998047, -30.742507934570312, 88.8153076171875, 5.025856018066406, -7.144981384277344, 18.9556884765625, -45.61091613769531, 3.363788604736328, 26.867759704589844, 41.824153900146484, 57.21318817138672, -24.522666931152344, 51.55304718017578, -16.913204193115234, -4.270139694213867, 113.67491149902344, 20.072246551513672, 38.76509094238281, 19.681291580200195, 36.58346176147461, 19.27947235107422, -2.606039047241211, 70.64166259765625, -1.2142696380615234, 14.062578201293945, 7.59222412109375, 2.3436813354492188, 45.67955780029297, 72.33317565917969, 5.9475860595703125, -15.97625732421875, 110.11967468261719, 114.67272186279297, 1.3009395599365234], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000128.npy"}
|
|
{"epoch": 0.18795888399412627, "step": 129, "batch_size": 64, "mean": 36.28157424926758, "std": 58.927734375, "min": -97.4530029296875, "p10": -21.949938201904295, "median": 27.696999549865723, "p90": 115.44353256225587, "max": 246.86663818359375, "pos_frac": 0.671875, "sample": [-1.2571163177490234, 28.555042266845703, -30.277606964111328, 88.54325866699219, -30.433156967163086, 72.87844848632812, 11.995697021484375, -18.2232666015625, 128.71823120117188, 25.951095581054688, 7.358268737792969, 77.79116821289062, 22.097347259521484, 36.19587707519531, 46.56415939331055, 58.48728942871094, 116.49897766113281, -14.538841247558594, -4.1571197509765625, 52.85447692871094, 148.34423828125, 69.88487243652344, 151.010986328125, -62.341819763183594, 51.109771728515625, -54.889984130859375, -18.679725646972656, 35.431243896484375, 246.86663818359375, 112.98082733154297, 26.838956832885742, 19.17083740234375, -7.349884033203125, 60.056373596191406, -5.5115203857421875, -1.4548168182373047, 19.587539672851562, 91.451416015625, 18.84032440185547, -11.935585021972656, -43.83644104003906, 1.2567157745361328, 32.55201721191406, 67.56069946289062, 128.50997924804688, -97.4530029296875, 16.544145584106445, 16.365097045898438, -5.023561477661133, -5.2640228271484375, 100.52899169921875, 138.6656951904297, 36.178802490234375, 40.547996520996094, 61.105010986328125, -8.201522827148438, 38.05961608886719, 42.37609100341797, -22.39996337890625, 64.53907012939453, 100.87451171875, -20.899879455566406, 74.5624771118164, -0.14070510864257812], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000129.npy"}
|
|
{"epoch": 0.1894273127753304, "step": 130, "batch_size": 64, "mean": 53.77422332763672, "std": 62.204471588134766, "min": -81.75723266601562, "p10": -10.347298431396485, "median": 38.3836669921875, "p90": 145.2161346435547, "max": 232.8638916015625, "pos_frac": 0.796875, "sample": [58.61708450317383, 39.84737014770508, 20.761558532714844, 147.46551513671875, -10.203407287597656, 18.568527221679688, 140.45663452148438, 75.52123260498047, -23.34825897216797, -3.3408946990966797, 17.2771053314209, 40.60886764526367, 33.64218521118164, 126.50314331054688, 163.62826538085938, 17.190933227539062, 80.64515686035156, -6.105714797973633, 32.91297149658203, 64.19110870361328, -1.1296501159667969, 23.65423583984375, 2.2011795043945312, 68.85337829589844, 80.64962768554688, 5.362213134765625, 34.81103515625, 83.32280731201172, 49.22954559326172, 142.88381958007812, 116.06492614746094, 150.7115478515625, -21.442161560058594, 19.107574462890625, 109.84063720703125, -81.75723266601562, 29.113990783691406, 24.89940643310547, 27.178417205810547, -14.858467102050781, 54.472862243652344, 43.55476760864258, 66.03108215332031, 185.2590789794922, 131.88360595703125, -7.404413223266602, 232.8638916015625, -22.620704650878906, -3.4262466430664062, 65.93338775634766, 35.49751281738281, 36.12709426879883, 148.56253051757812, -10.408966064453125, 115.01814270019531, -80.4062271118164, 109.63261413574219, 3.7555885314941406, 78.94857788085938, 102.23580932617188, 146.2156982421875, 53.95036315917969, 36.91996383666992, 35.41804504394531], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000130.npy"}
|
|
{"epoch": 0.19089574155653452, "step": 131, "batch_size": 64, "mean": 51.93624496459961, "std": 52.78141784667969, "min": -84.58560180664062, "p10": 2.670402526855469, "median": 39.86367225646973, "p90": 119.76305007934572, "max": 186.576171875, "pos_frac": 0.90625, "sample": [44.580196380615234, 36.40735626220703, 42.32701873779297, 29.605815887451172, 186.576171875, -84.58560180664062, 34.989559173583984, 26.840904235839844, 33.832176208496094, 74.76163482666016, 96.2120361328125, 19.17688751220703, 33.79998779296875, 10.754095077514648, 64.52406311035156, 28.238632202148438, 31.684040069580078, -2.0048255920410156, 102.0869140625, 56.01017379760742, 39.99259948730469, 122.92495727539062, -2.0303497314453125, -8.006536483764648, 40.03404235839844, 29.140792846679688, 75.75215148925781, 8.194011688232422, 146.14617919921875, 34.75335693359375, 36.09828567504883, -53.44212341308594, 117.22005462646484, 6.349384307861328, 170.96969604492188, 3.0160446166992188, 67.11800384521484, 18.808387756347656, 113.76850891113281, 39.734745025634766, 92.59756469726562, 27.126367568969727, 159.71365356445312, 22.583431243896484, -69.95738983154297, 90.05859375, 8.648193359375, 89.13214874267578, 4.048900604248047, 26.396541595458984, 2.5222702026367188, 99.33592224121094, 14.571296691894531, 140.12355041503906, 108.57672119140625, 120.8529052734375, 70.88771057128906, 54.68226623535156, 23.544723510742188, 64.01405334472656, 84.69078826904297, 83.05309295654297, 87.00067138671875, 47.356231689453125], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000131.npy"}
|
|
{"epoch": 0.19236417033773862, "step": 132, "batch_size": 64, "mean": 44.557350158691406, "std": 62.085845947265625, "min": -100.23516845703125, "p10": -7.176355934143064, "median": 36.56555366516113, "p90": 111.70812911987305, "max": 266.59075927734375, "pos_frac": 0.84375, "sample": [14.41533088684082, 12.026317596435547, 121.53041076660156, -42.065269470214844, 50.38774108886719, 36.21698760986328, 55.75099563598633, 219.07199096679688, 225.61593627929688, 8.017257690429688, 46.381011962890625, 80.52080535888672, 13.073205947875977, 42.73899841308594, -100.23516845703125, 12.941917419433594, 140.7081298828125, -18.582717895507812, 46.480796813964844, -7.935338973999023, 3.2554702758789062, -5.4053955078125, 94.28730010986328, 63.243263244628906, 12.165580749511719, 8.057548522949219, 4.236167907714844, 84.82469940185547, 4.4126739501953125, 9.015304565429688, -1.3355026245117188, 7.139373779296875, 16.09159278869629, 72.28792572021484, 71.62948608398438, 112.27217864990234, 36.06996154785156, 47.57378387451172, 67.83958435058594, 45.24752426147461, 39.50126647949219, 34.380313873291016, 36.914119720458984, 110.39201354980469, -66.02873229980469, 39.39857864379883, 99.50589752197266, -34.834373474121094, 10.34438705444336, 75.03205871582031, -14.64647102355957, 51.83439636230469, 92.38920593261719, 11.341117858886719, 42.69502258300781, -0.053009033203125, 23.15526580810547, 81.37228393554688, 25.440887451171875, 12.342256546020508, 141.68165588378906, 19.93802261352539, 43.01544189453125, 266.59075927734375], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000132.npy"}
|
|
{"epoch": 0.19383259911894274, "step": 133, "batch_size": 64, "mean": 39.45289611816406, "std": 56.10710906982422, "min": -66.27986145019531, "p10": -10.471991729736327, "median": 29.154701232910156, "p90": 107.87334289550783, "max": 197.45480346679688, "pos_frac": 0.796875, "sample": [16.022972106933594, 66.23280334472656, 4.7542572021484375, 76.6256103515625, 9.671466827392578, -22.483428955078125, -0.599945068359375, 23.391624450683594, -51.78190612792969, 180.1068878173828, 42.182403564453125, 62.92485809326172, -2.9090957641601562, 91.75458526611328, -53.450439453125, 13.564910888671875, 56.88665771484375, -3.0856170654296875, 162.54383850097656, 9.440042495727539, 62.367462158203125, 47.20509338378906, -9.459541320800781, 101.888671875, 2.2936935424804688, 83.95506286621094, 36.900733947753906, 63.89042282104492, 63.201210021972656, 37.331298828125, 17.254806518554688, 14.663642883300781, 20.400644302368164, 191.12979125976562, 2.6556243896484375, 46.160186767578125, 54.905540466308594, -37.61761474609375, 155.2879638671875, 28.8404541015625, 42.09120559692383, 29.468948364257812, 38.78477478027344, 197.45480346679688, -66.27986145019531, -6.3186798095703125, 56.4061279296875, 134.39622497558594, 8.76312255859375, 37.968929290771484, 7.08258056640625, 89.66693115234375, -36.216346740722656, 110.43820190429688, 72.53881072998047, -10.905899047851562, 66.09971618652344, 40.200958251953125, 4.5398712158203125, 18.094282150268555, 6.1387939453125, -0.09521865844726562, 8.119405746459961, 11.500007629394531], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000133.npy"}
|
|
{"epoch": 0.19530102790014683, "step": 134, "batch_size": 64, "mean": 43.490806579589844, "std": 50.085044860839844, "min": -30.00310516357422, "p10": -14.95122604370117, "median": 31.41672134399414, "p90": 111.42744064331055, "max": 236.85256958007812, "pos_frac": 0.828125, "sample": [8.908523559570312, -30.00310516357422, -16.427780151367188, 101.73257446289062, 162.88720703125, 25.299949645996094, 23.973987579345703, 28.85300064086914, -8.591804504394531, 40.111366271972656, 90.57771301269531, 17.81570816040039, 10.598505020141602, 9.376823425292969, 46.444313049316406, 30.141342163085938, -16.3242130279541, 34.63938903808594, 62.90495300292969, 15.045467376708984, 5.689861297607422, 10.069869995117188, 66.66683197021484, 27.786460876464844, 136.9376983642578, 27.735946655273438, 91.04425048828125, 86.45268249511719, 43.47306442260742, 15.867790222167969, 33.036643981933594, 111.49105834960938, 3.245077133178711, 33.83548355102539, 119.80706787109375, 95.68225860595703, 111.27899932861328, -19.365812301635742, 70.46195983886719, -3.9564895629882812, 36.1092529296875, -15.597869873046875, 47.88502883911133, 7.96528434753418, 236.85256958007812, 18.34210205078125, 67.65238952636719, 31.755348205566406, 57.03883361816406, -1.2076034545898438, 47.098358154296875, -13.442390441894531, -17.53333282470703, 144.9576416015625, 61.05852508544922, 19.07013702392578, 51.90214920043945, 28.688831329345703, 31.078094482421875, 59.42211151123047, -24.237014770507812, 63.21413803100586, 24.28542709350586, 115.84902954101562], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000134.npy"}
|
|
{"epoch": 0.19676945668135096, "step": 135, "batch_size": 64, "mean": 56.80242919921875, "std": 67.67213439941406, "min": -62.967220306396484, "p10": -16.545652389526364, "median": 38.30820655822754, "p90": 154.8112762451172, "max": 258.34136962890625, "pos_frac": 0.828125, "sample": [29.878807067871094, 89.07095336914062, 26.986568450927734, 167.74661254882812, 77.6907958984375, 62.17803192138672, 197.0045928955078, 36.44255447387695, 180.46084594726562, 164.94813537597656, 110.00457763671875, 48.58497619628906, 144.95880126953125, 105.51036071777344, 200.36065673828125, 89.25881958007812, 28.80084991455078, 150.23458862304688, 11.139472961425781, -20.516191482543945, 3.7522926330566406, 98.37361145019531, 111.180419921875, 64.75691223144531, 27.873075485229492, 83.97638702392578, 11.226638793945312, 258.34136962890625, -17.474597930908203, 94.220458984375, -34.30487060546875, 156.70602416992188, 150.39019775390625, -62.967220306396484, 32.2877082824707, -10.169513702392578, 23.756629943847656, 47.87220764160156, 84.80569458007812, 2.277740478515625, -43.783203125, 28.136428833007812, 68.232666015625, 33.26713562011719, 5.017948150634766, 42.91577911376953, 9.994014739990234, -5.8802032470703125, 40.173858642578125, 14.986452102661133, 24.850696563720703, 77.39070129394531, 71.52641296386719, 68.08953857421875, 4.087471008300781, 68.907958984375, 35.191009521484375, 144.01956176757812, 14.812721252441406, -58.0633544921875, -11.224395751953125, -14.37811279296875, -19.075149536132812, 8.533515930175781], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000135.npy"}
|
|
{"epoch": 0.19823788546255505, "step": 136, "batch_size": 64, "mean": 39.24884033203125, "std": 65.80789184570312, "min": -44.214599609375, "p10": -30.567157745361325, "median": 21.151023864746094, "p90": 97.5030258178711, "max": 298.37554931640625, "pos_frac": 0.75, "sample": [61.77824783325195, 49.79450225830078, 68.27133178710938, 71.67852783203125, -2.9518585205078125, -36.0999870300293, 227.8988800048828, 107.38360595703125, -16.421356201171875, 49.73390579223633, 77.19978332519531, 48.690677642822266, 7.9598236083984375, 13.014759063720703, 66.01002502441406, 39.19938659667969, 62.387168884277344, 24.445457458496094, 95.92628479003906, 230.72366333007812, -4.5194244384765625, 65.15022277832031, 298.37554931640625, 10.86627197265625, 85.1859130859375, 0.5938644409179688, 160.22344970703125, 1.9483528137207031, 173.62451171875, 17.572202682495117, 13.103599548339844, 6.486705780029297, 50.75678253173828, -44.214599609375, 34.89167785644531, -32.186737060546875, 17.856590270996094, -12.527824401855469, 62.35301208496094, 65.38602447509766, 14.876733779907227, -13.304031372070312, -11.50048828125, 54.627662658691406, 1.9303340911865234, 0.9487972259521484, -11.489803314208984, 31.27145004272461, 69.27409362792969, 89.13129425048828, -26.78813934326172, -34.42937469482422, 33.418243408203125, 46.964561462402344, -34.48109436035156, 6.4674835205078125, 40.67436218261719, -12.958065032958984, -34.83865737915039, 15.88201904296875, 3.4466896057128906, 98.17877197265625, 5.978462219238281, -38.90449523925781], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000136.npy"}
|
|
{"epoch": 0.19970631424375918, "step": 137, "batch_size": 64, "mean": 60.80854797363281, "std": 82.70492553710938, "min": -117.55088806152344, "p10": -12.478021812438964, "median": 37.100589752197266, "p90": 185.06976318359392, "max": 297.50811767578125, "pos_frac": 0.859375, "sample": [-5.6559295654296875, 17.95926284790039, 36.91304016113281, 90.79674530029297, 117.55821990966797, 21.00879669189453, 34.866058349609375, 63.277099609375, -12.172069549560547, 90.90992736816406, 4.331722259521484, 79.20166015625, 297.50811767578125, 13.817426681518555, 32.033843994140625, -75.15963745117188, 85.92672729492188, 16.33727264404297, 29.536161422729492, 130.56190490722656, 141.65809631347656, 66.14415740966797, 3.3037147521972656, 270.1207275390625, 85.44731140136719, 246.8979949951172, 144.55230712890625, 79.33448791503906, 13.860984802246094, 144.175537109375, -63.99909973144531, 18.23956298828125, 56.018882751464844, 11.606254577636719, 110.23910522460938, 5.7935943603515625, 109.56355285644531, 48.295623779296875, 2.8893508911132812, 37.28813934326172, 52.6253662109375, 33.789939880371094, 12.432197570800781, -117.55088806152344, -28.33417510986328, -47.45635223388672, 202.43438720703125, -14.30501937866211, 30.995994567871094, 250.69998168945312, 42.117759704589844, 99.08038330078125, 4.58929443359375, 83.25201416015625, -12.60914421081543, 15.385114669799805, 39.880950927734375, 7.118450164794922, 2.3408374786376953, 85.00272369384766, 228.26475524902344, 10.104965209960938, 102.65497589111328, 208.2459716796875], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000137.npy"}
|
|
{"epoch": 0.2011747430249633, "step": 138, "batch_size": 64, "mean": 45.769859313964844, "std": 54.02177429199219, "min": -86.38348388671875, "p10": -8.660871887207032, "median": 39.73653602600098, "p90": 115.69703598022463, "max": 207.85595703125, "pos_frac": 0.828125, "sample": [121.98812866210938, -8.498016357421875, 64.77336120605469, 103.86859130859375, 14.513221740722656, 77.77257537841797, -4.3519134521484375, -1.1735420227050781, 1.3640289306640625, 122.36531066894531, 27.749683380126953, 18.329299926757812, 45.40028381347656, 9.15043830871582, 22.325468063354492, -86.38348388671875, 14.206247329711914, -15.572795867919922, 0.5373382568359375, 2.6436080932617188, 140.32537841796875, 139.341796875, 11.040843963623047, 6.214515686035156, 103.8916015625, 104.09129333496094, 79.09728240966797, 4.844413757324219, 76.2984390258789, 90.93098449707031, 54.873992919921875, 20.367563247680664, 95.95759582519531, 13.09271240234375, 111.67501068115234, 117.42076110839844, 49.59027099609375, 43.550785064697266, -53.194480895996094, 34.29143524169922, 54.05882263183594, 24.734825134277344, 47.60066223144531, 2.5433788299560547, -1.987701416015625, -36.282928466796875, 85.47383880615234, 107.90523529052734, 14.526897430419922, 35.92228698730469, 85.30201721191406, 26.923776626586914, 102.23963928222656, -31.16138458251953, -23.372285842895508, 49.56631851196289, -8.730667114257812, 54.95707702636719, 12.825111389160156, 207.85595703125, 83.99292755126953, 48.13200378417969, 82.49005889892578, 129.04507446289062], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000138.npy"}
|
|
{"epoch": 0.2026431718061674, "step": 139, "batch_size": 64, "mean": 56.984291076660156, "std": 62.937774658203125, "min": -67.83760070800781, "p10": -15.586103820800782, "median": 47.837961196899414, "p90": 123.81383438110353, "max": 268.3444519042969, "pos_frac": 0.84375, "sample": [-17.343994140625, 113.77909088134766, 82.6731185913086, 131.0065460205078, -9.340049743652344, 58.940155029296875, 102.01470184326172, -3.2585105895996094, 59.89311599731445, 120.33617401123047, -67.83760070800781, 47.8743896484375, -27.161792755126953, 201.52935791015625, 115.94580841064453, 43.555686950683594, 77.3302001953125, 41.30841064453125, 30.500946044921875, 47.80153274536133, 116.04991912841797, 57.22209930419922, -40.978492736816406, 109.84845733642578, 36.676788330078125, 1.6348419189453125, 125.30426025390625, 67.82862854003906, 11.397916793823242, 36.80754852294922, 117.13929748535156, 13.405717849731445, 88.07228088378906, 81.6245346069336, 39.229576110839844, 66.53047180175781, 2.5625991821289062, 0.18169784545898438, 4.816583633422852, 14.184837341308594, 52.72045135498047, 2.9091720581054688, 13.039077758789062, 116.52154541015625, 22.506263732910156, 190.53335571289062, 85.93132019042969, -15.7791748046875, 53.11186981201172, -15.135604858398438, 70.62403106689453, -16.17743682861328, 28.10499382019043, 67.72047424316406, 268.3444519042969, 36.14435958862305, 201.76011657714844, 26.740737915039062, 50.84040069580078, -20.685546875, 144.99952697753906, 28.472808837890625, 37.210575103759766, 117.44975280761719], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000139.npy"}
|
|
{"epoch": 0.20411160058737152, "step": 140, "batch_size": 64, "mean": 52.312705993652344, "std": 67.79847717285156, "min": -71.84837341308594, "p10": -22.780825424194337, "median": 47.229610443115234, "p90": 136.75813293457034, "max": 265.48675537109375, "pos_frac": 0.734375, "sample": [-13.914806365966797, 105.00151062011719, 265.48675537109375, -49.04039764404297, -4.0863189697265625, 100.39335632324219, 75.37421417236328, 131.88966369628906, 54.58130645751953, 131.75531005859375, 90.86880493164062, 52.399253845214844, 94.96923828125, 29.90471649169922, 20.846710205078125, 59.695831298828125, 217.14761352539062, 8.607006072998047, -1.0520973205566406, 57.58672332763672, 18.286102294921875, 58.41014862060547, 76.1123046875, 94.03397369384766, 0.8836212158203125, -22.449600219726562, 191.8946075439453, -21.213973999023438, 29.291748046875, 69.68873596191406, 20.804601669311523, 29.440040588378906, 9.036378860473633, 90.46788024902344, -18.252044677734375, 53.659027099609375, 79.53785705566406, 40.95220184326172, -0.931640625, 121.88159942626953, 112.99739837646484, -23.229812622070312, 138.84461975097656, -69.73825073242188, -5.0322418212890625, 56.45927429199219, 42.059967041015625, 73.87388610839844, 153.20858764648438, 15.450698852539062, -10.17344856262207, 129.9784698486328, -11.058382034301758, 27.389923095703125, -71.84837341308594, -22.922779083251953, -43.936439514160156, 33.98908996582031, 154.164306640625, 139.124267578125, -29.65898895263672, 62.22075653076172, 41.49543762207031, 104.40731048583984], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000140.npy"}
|
|
{"epoch": 0.2055800293685756, "step": 141, "batch_size": 64, "mean": 50.01852798461914, "std": 65.95304870605469, "min": -163.45382690429688, "p10": -18.433117675781247, "median": 44.93223190307617, "p90": 136.96706848144532, "max": 197.90786743164062, "pos_frac": 0.8125, "sample": [78.68499755859375, 32.345985412597656, -1.3080406188964844, 84.9780044555664, 94.30021667480469, -16.46149444580078, 68.73081970214844, 46.87815856933594, 6.877113342285156, -3.4550323486328125, 55.66685104370117, 173.2574462890625, -163.45382690429688, 38.567352294921875, -6.700401306152344, 39.43229675292969, 1.930032730102539, -25.23065185546875, 186.1396026611328, -48.57878112792969, 7.479347229003906, 141.54539489746094, 41.9290657043457, 70.31698608398438, 122.60870361328125, 65.4175033569336, 70.79876708984375, 10.267494201660156, 2.140655517578125, 59.89206314086914, 96.89192199707031, 44.13010787963867, 23.647615432739258, 127.99209594726562, 197.90786743164062, 30.659677505493164, 86.87152862548828, 19.671646118164062, -20.643949508666992, 18.645055770874023, -11.174888610839844, 44.82893371582031, -19.278099060058594, 157.92703247070312, 11.389434814453125, 110.68968963623047, 138.61581420898438, 64.308837890625, 11.615274429321289, -89.82388305664062, 17.031471252441406, 78.961669921875, 45.03553009033203, 57.618804931640625, 77.07749938964844, 104.6348648071289, 195.45965576171875, 98.6346206665039, 2.2859554290771484, 133.1199951171875, 53.44158935546875, 73.95913696289062, -32.13142395019531, 16.18817901611328], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000141.npy"}
|
|
{"epoch": 0.20704845814977973, "step": 142, "batch_size": 64, "mean": 57.363826751708984, "std": 79.406005859375, "min": -142.2288818359375, "p10": -20.21634407043457, "median": 33.705806732177734, "p90": 180.10010223388673, "max": 219.735595703125, "pos_frac": 0.765625, "sample": [-10.110565185546875, 175.60110473632812, 15.381818771362305, 31.918685913085938, -14.742546081542969, 184.31532287597656, -18.885128021240234, -15.239400863647461, -55.97575378417969, 29.010089874267578, 96.2279281616211, 92.01589965820312, 127.99481964111328, 25.292654037475586, 38.45153045654297, 162.63807678222656, 9.61690902709961, -142.2288818359375, -72.20223999023438, 56.00243377685547, 36.44779968261719, 37.31965637207031, 16.134136199951172, 219.735595703125, 133.70054626464844, 32.51531982421875, 109.18377685546875, 28.792116165161133, 109.81641387939453, -17.250057220458984, 70.49896240234375, 106.53800964355469, 150.8206329345703, 70.528076171875, -1.5260200500488281, 164.89637756347656, 182.0282440185547, 18.781600952148438, 14.071527481079102, 26.242515563964844, -35.63352966308594, -37.660343170166016, 6.219509124755859, 194.3515167236328, 53.686622619628906, 189.1245574951172, 13.731742858886719, 4.965293884277344, 49.78141784667969, -20.786865234375, 52.2707633972168, -9.608314514160156, 19.176027297973633, -55.86431884765625, 162.18202209472656, 212.80084228515625, 29.479591369628906, 166.712646484375, 14.015121459960938, -3.2104015350341797, 103.05815887451172, 196.73507690429688, 34.89629364013672, 106.50348663330078], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000142.npy"}
|
|
{"epoch": 0.20851688693098386, "step": 143, "batch_size": 64, "mean": 36.23059844970703, "std": 68.4030532836914, "min": -110.98106384277344, "p10": -38.71489944458008, "median": 26.920689582824707, "p90": 118.36672668457032, "max": 263.7055358886719, "pos_frac": 0.640625, "sample": [64.59291076660156, -2.0360107421875, 44.704307556152344, 30.332809448242188, 193.13946533203125, 60.25376892089844, -39.70037078857422, 83.13956451416016, 95.52766418457031, 20.127294540405273, 99.50923156738281, -71.87828063964844, -40.887542724609375, 93.28207397460938, 69.55770111083984, -89.3233642578125, -36.385826110839844, -0.025974273681640625, 79.85855102539062, -9.647598266601562, 22.209224700927734, 175.42568969726562, -48.350807189941406, 81.82131958007812, 21.029197692871094, -13.50381851196289, 43.48717498779297, 119.79874420166016, 27.567840576171875, 20.654705047607422, -12.333969116210938, -36.41546630859375, -28.45855712890625, 7.723672866821289, 99.04611206054688, 18.16667938232422, 29.900772094726562, 50.04139709472656, 22.617706298828125, 263.7055358886719, -11.765693664550781, -7.356470108032227, 33.75202178955078, 79.14249420166016, -0.9480018615722656, 52.354820251464844, 115.02535247802734, 120.8433609008789, -4.827997207641602, -16.39300537109375, -49.511505126953125, 197.3369140625, -19.140052795410156, 61.009437561035156, 54.06819152832031, 26.27353858947754, 83.22337341308594, 28.130592346191406, 134.8494873046875, -110.98106384277344, -0.3235321044921875, 32.34027862548828, -2.223785400390625, 15.606002807617188], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000143.npy"}
|
|
{"epoch": 0.20998531571218795, "step": 144, "batch_size": 64, "mean": 47.1068229675293, "std": 57.250274658203125, "min": -77.15445709228516, "p10": -18.497832870483396, "median": 39.17853546142578, "p90": 124.70768661499024, "max": 192.659423828125, "pos_frac": 0.796875, "sample": [-19.332366943359375, 84.93282318115234, 4.127763748168945, 130.43243408203125, -77.15445709228516, -6.663002014160156, 61.580718994140625, 97.97264862060547, 39.65935516357422, 23.452083587646484, 58.24427795410156, -16.550586700439453, 53.80714416503906, 68.65727233886719, 54.333106994628906, 122.9746322631836, 46.000267028808594, 130.53973388671875, 91.07048797607422, 113.34705352783203, 112.9388427734375, 148.60397338867188, -11.266544342041016, -54.80645751953125, 85.38750457763672, -8.437530517578125, 44.9686279296875, 6.235466003417969, 64.77852630615234, 15.865213394165039, -31.597505569458008, 71.58155059814453, 55.35167694091797, 27.95348358154297, -27.37445831298828, 120.49990844726562, 38.697715759277344, 22.11849594116211, 22.780759811401367, 71.2605972290039, 28.014938354492188, 53.008705139160156, 192.659423828125, 30.989686965942383, 25.702171325683594, 64.41746520996094, -28.239612579345703, 24.027267456054688, 29.374984741210938, 90.79490661621094, 92.88408660888672, 184.1913299560547, 2.45294189453125, -10.45794677734375, -1.8121719360351562, 1.1730804443359375, 176.39561462402344, 15.958271026611328, 21.038549423217773, 16.854957580566406, 42.732505798339844, 32.13549041748047, 125.45042419433594, -31.881874084472656], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000144.npy"}
|
|
{"epoch": 0.21145374449339208, "step": 145, "batch_size": 64, "mean": 49.61822509765625, "std": 61.6395263671875, "min": -71.71975708007812, "p10": -21.14391365051269, "median": 40.6964111328125, "p90": 119.36565780639648, "max": 227.59063720703125, "pos_frac": 0.78125, "sample": [20.999610900878906, -17.88177490234375, -57.614532470703125, 227.59063720703125, 119.61032104492188, 97.66851043701172, 172.54202270507812, 4.501804351806641, -71.71975708007812, 49.268978118896484, 8.339996337890625, 47.93229675292969, 116.66072082519531, -29.677494049072266, 12.953075408935547, 94.46932220458984, 57.595375061035156, 33.54087829589844, 81.91691589355469, -35.1470947265625, -6.321657180786133, 31.723648071289062, 34.8211669921875, 118.7947769165039, 74.86686706542969, -49.08631896972656, 10.637374877929688, 15.319934844970703, -7.4661865234375, 142.05340576171875, 27.36639976501465, 16.653472900390625, 95.45759582519531, -22.541973114013672, -31.527191162109375, 94.05912780761719, 10.325485229492188, 17.344045639038086, 64.20380401611328, -0.26904296875, 57.29202651977539, 3.843891143798828, 114.13263702392578, -10.577579498291016, 96.67877197265625, 149.99159240722656, 81.89193725585938, -14.843582153320312, 43.03853988647461, 116.7999496459961, 20.844501495361328, 38.35428237915039, 110.47343444824219, 127.88103485107422, 55.85819625854492, 15.293661117553711, -1.7416763305664062, 54.00987243652344, 84.99777221679688, 97.65953826904297, 189.48834228515625, 51.95429611206055, 10.945907592773438, 111.334716796875], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000145.npy"}
|
|
{"epoch": 0.21292217327459617, "step": 146, "batch_size": 64, "mean": 39.41748046875, "std": 66.41065216064453, "min": -162.31005859375, "p10": -51.42789077758788, "median": 41.4298095703125, "p90": 119.90693969726564, "max": 194.1150360107422, "pos_frac": 0.75, "sample": [34.93651580810547, -17.355133056640625, 97.44607543945312, -53.719970703125, 16.364622116088867, 7.0131988525390625, 76.09783172607422, 54.167022705078125, 60.83927917480469, 42.72755432128906, 160.210205078125, 62.19066619873047, 33.193809509277344, 3.31884765625, 23.889755249023438, 4.473960876464844, 76.18256378173828, 1.6124725341796875, 17.060806274414062, -10.440851211547852, -1.3103408813476562, 26.2381591796875, 61.32109069824219, 109.09408569335938, -63.29438781738281, 57.90339660644531, 116.6829833984375, -162.31005859375, -55.71351623535156, 145.53262329101562, -57.80231475830078, 57.007774353027344, 0.9799404144287109, 63.648345947265625, 168.30645751953125, -54.56055450439453, 43.5634765625, 40.13206481933594, 99.13616180419922, 194.1150360107422, 15.187131881713867, 28.786026000976562, 68.55207824707031, 61.28413391113281, 45.100372314453125, 83.31867218017578, 92.34366607666016, -63.12782287597656, 38.023284912109375, -46.07970428466797, 76.2767333984375, 110.53878021240234, 121.28863525390625, 52.060447692871094, -0.15854644775390625, 57.90418243408203, 23.37383460998535, -19.802528381347656, 70.49981689453125, -36.59032440185547, 181.05018615722656, -24.4826717376709, 140.7713623046875, -32.27854919433594], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000146.npy"}
|
|
{"epoch": 0.2143906020558003, "step": 147, "batch_size": 64, "mean": 53.42216491699219, "std": 70.6837158203125, "min": -102.67337036132812, "p10": -15.67233657836914, "median": 42.25372123718262, "p90": 152.07471160888673, "max": 231.24725341796875, "pos_frac": 0.765625, "sample": [108.85955810546875, -48.10702896118164, 44.605926513671875, 190.5843505859375, -15.510597229003906, -3.4644317626953125, 77.1044921875, 86.0645751953125, 33.8582763671875, -21.147544860839844, 24.078283309936523, 65.42439270019531, 52.30603790283203, 108.71932983398438, 26.729595184326172, 168.66741943359375, 96.05165100097656, 22.410459518432617, 0.072113037109375, 145.71615600585938, 151.4932403564453, -0.7727890014648438, 136.43028259277344, 22.32646369934082, 224.81442260742188, 39.282752990722656, 180.34896850585938, 83.74848937988281, -4.929647445678711, 45.267242431640625, 26.113201141357422, -102.67337036132812, 6.456300735473633, 99.70783996582031, 90.69686889648438, -19.85124969482422, 54.97657012939453, 231.24725341796875, 61.326942443847656, 19.859256744384766, 141.32989501953125, 12.812088012695312, -4.522661209106445, 13.907455444335938, 26.108779907226562, 52.695655822753906, -0.4372711181640625, 63.41484451293945, 58.27919006347656, -15.741653442382812, 25.9732666015625, 43.392799377441406, 19.65990447998047, 126.06742858886719, -14.690895080566406, 45.04087448120117, 194.99664306640625, -14.886241912841797, 24.054481506347656, -91.503662109375, 152.32391357421875, -40.706817626953125, 51.44363784790039, 41.11464309692383], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000147.npy"}
|
|
{"epoch": 0.21585903083700442, "step": 148, "batch_size": 64, "mean": 43.061397552490234, "std": 62.59330368041992, "min": -78.87577819824219, "p10": -18.52090911865234, "median": 31.066984176635742, "p90": 115.86283569335939, "max": 252.3341064453125, "pos_frac": 0.828125, "sample": [0.036502838134765625, 41.53290557861328, 5.3408203125, 84.096435546875, -32.371726989746094, 75.43950653076172, 78.65011596679688, 34.188072204589844, -22.764755249023438, 163.76991271972656, 15.971778869628906, 114.47130584716797, 21.26512336730957, 2.3205032348632812, 118.37158203125, 185.60824584960938, 2.1048355102539062, 3.2008514404296875, -25.078643798828125, -43.424072265625, 112.74261474609375, 103.97866821289062, -4.255289077758789, 68.4012451171875, 90.04600524902344, 38.47700500488281, 59.47618103027344, 33.466251373291016, 93.76469421386719, -0.6652374267578125, 116.4592056274414, 23.566802978515625, 30.322267532348633, 48.18512725830078, -19.227256774902344, 33.43710708618164, 21.88353729248047, 21.194534301757812, 91.4820327758789, 32.90568923950195, 109.96334075927734, 31.653362274169922, -78.87577819824219, 30.480606079101562, 33.97267150878906, 20.227401733398438, 125.14767456054688, 34.169097900390625, 8.084941864013672, -16.872764587402344, -67.26858520507812, 31.723785400390625, 11.004884719848633, 8.046926498413086, 32.59668731689453, 6.324104309082031, 24.659576416015625, 252.3341064453125, 239.669677734375, 7.176568984985352, -1.4423084259033203, 10.685646057128906, 78.36148071289062, 5.735773086547852], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000148.npy"}
|
|
{"epoch": 0.2173274596182085, "step": 149, "batch_size": 64, "mean": 54.41447067260742, "std": 62.59941101074219, "min": -129.38462829589844, "p10": 0.04145812988281267, "median": 43.71685791015625, "p90": 124.48325424194336, "max": 251.9634246826172, "pos_frac": 0.890625, "sample": [94.46834564208984, 55.46037292480469, 170.45220947265625, 42.514591217041016, 86.8922348022461, 44.772674560546875, 54.20465850830078, -13.801481246948242, -21.51079559326172, 6.347934722900391, 17.351980209350586, 4.9361724853515625, 55.75947952270508, 37.42362976074219, 42.31982421875, -39.279693603515625, 88.62919616699219, 72.3612060546875, 36.4622802734375, 100.9354476928711, 78.68022155761719, 139.93359375, 89.7980728149414, 23.822912216186523, 13.776174545288086, 74.94935607910156, 38.89336395263672, 104.34517669677734, 68.03425598144531, 100.72491455078125, 58.81547546386719, -0.028865814208984375, 251.9634246826172, 33.18390655517578, 123.70052337646484, 24.169540405273438, 175.43594360351562, 50.89350128173828, 22.982376098632812, 91.15189361572266, 25.448535919189453, 71.34149169921875, 12.522531509399414, 42.661041259765625, -4.154716491699219, 53.066383361816406, 18.605693817138672, 56.690940856933594, 2.730865478515625, -129.38462829589844, 58.65716552734375, 23.04656219482422, 7.0211639404296875, 124.81871032714844, 16.256216049194336, 187.94235229492188, 13.709026336669922, 38.18275451660156, 120.48590087890625, 215.55511474609375, 13.199058532714844, 0.20554733276367188, 73.19943237304688, -61.20708465576172], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000149.npy"}
|
|
{"epoch": 0.21879588839941264, "step": 150, "batch_size": 64, "mean": 52.02049255371094, "std": 72.58626556396484, "min": -65.73249053955078, "p10": -11.06514797210693, "median": 34.68119430541992, "p90": 138.98956756591798, "max": 316.0286865234375, "pos_frac": 0.8125, "sample": [218.96600341796875, 256.3817138671875, 19.322059631347656, 22.105369567871094, 54.64094161987305, -19.645118713378906, 136.970947265625, 34.73930358886719, -65.73249053955078, 38.46721649169922, 316.0286865234375, -12.460153579711914, -1.02569580078125, 14.720909118652344, 3.96746826171875, 97.65367126464844, 24.89495849609375, 45.67047119140625, 45.35435485839844, 53.45337677001953, 33.19386672973633, 57.78363800048828, 131.77005004882812, 26.120399475097656, 96.03846740722656, 73.66569519042969, 157.3353729248047, 106.11775207519531, 17.364349365234375, 64.57066345214844, 9.270881652832031, -38.17057800292969, 64.5631103515625, 139.8546905517578, 36.270751953125, 6.961231231689453, 140.67230224609375, 19.766630172729492, 109.58892059326172, -6.70068359375, 71.94544982910156, -16.192184448242188, -23.28740692138672, -0.0260009765625, 102.6617202758789, 4.726886749267578, 18.044898986816406, 52.13075256347656, 14.537010192871094, 31.993549346923828, 40.093353271484375, 52.64301681518555, 53.11830139160156, 14.948417663574219, 34.623085021972656, 270.9787292480469, 15.283895492553711, 14.565343856811523, 7.010515213012695, 39.9420166015625, 65.75736236572266, -7.8101348876953125, -58.78388977050781, -0.10459518432617188], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000150.npy"}
|
|
{"epoch": 0.22026431718061673, "step": 151, "batch_size": 64, "mean": 54.52318572998047, "std": 60.39453887939453, "min": -93.51043701171875, "p10": -17.97140769958496, "median": 44.38295364379883, "p90": 145.6695770263672, "max": 197.2648162841797, "pos_frac": 0.875, "sample": [-93.51043701171875, 197.2648162841797, 45.11665344238281, 2.3353805541992188, 27.083908081054688, 42.007720947265625, 57.91777801513672, 88.20506286621094, 68.26773071289062, 29.988800048828125, 142.32037353515625, -28.162702560424805, -18.27452850341797, 93.62278747558594, 41.781524658203125, -27.26059341430664, 2.991863250732422, 33.484779357910156, 43.64472961425781, 43.649253845214844, 177.76739501953125, 54.15192794799805, 98.17826843261719, 48.127723693847656, -35.345802307128906, 155.98519897460938, 20.101524353027344, 17.582672119140625, -17.26412582397461, 33.32539367675781, 42.77629089355469, 20.57355308532715, 21.27035140991211, 15.597249984741211, 72.554931640625, 147.10494995117188, -53.419349670410156, 63.599151611328125, 81.50358581542969, 79.805419921875, 9.670324325561523, 55.127342224121094, 66.91635131835938, 88.19352722167969, 17.228858947753906, 163.343017578125, 90.61079406738281, 46.49390411376953, 105.2608642578125, 14.055213928222656, 128.42735290527344, 11.748149871826172, 2.7904911041259766, 113.47403717041016, 19.165725708007812, 12.923042297363281, 165.88552856445312, 95.81217956542969, 185.1207275390625, 112.60574340820312, -35.438720703125, 67.4789047241211, 19.28417205810547, 96.85516357421875], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000151.npy"}
|
|
{"epoch": 0.22173274596182085, "step": 152, "batch_size": 64, "mean": 62.407257080078125, "std": 71.87776947021484, "min": -57.029754638671875, "p10": -17.91430435180664, "median": 51.072378158569336, "p90": 167.67599334716803, "max": 248.77455139160156, "pos_frac": 0.828125, "sample": [27.4822998046875, 32.88922119140625, 137.93467712402344, 143.90106201171875, 58.90633773803711, 151.7110595703125, 52.71670150756836, 91.1031494140625, 232.6333770751953, 29.44512939453125, 127.9769515991211, 106.73948669433594, 27.858997344970703, -43.46442413330078, 10.820365905761719, 179.863037109375, 107.01764678955078, 33.198524475097656, 59.32267379760742, 38.887481689453125, 52.743797302246094, 39.54656219482422, -42.153770446777344, 61.931671142578125, 7.696559906005859, 248.77455139160156, 101.9678955078125, 180.9706573486328, 35.21690368652344, 15.02850341796875, 62.12025451660156, -57.029754638671875, 29.24819564819336, -3.32000732421875, 2.9987049102783203, 53.38249206542969, 26.715919494628906, 153.641845703125, 30.91358184814453, 36.464935302734375, 228.64431762695312, -8.105781555175781, 49.42805480957031, 127.02249145507812, 54.46257019042969, -12.834320068359375, 197.2360076904297, 27.219343185424805, -49.387489318847656, 88.96934509277344, 16.940134048461914, 87.31266784667969, 55.2486572265625, 38.71454620361328, 173.6906280517578, -31.383216857910156, 117.43087768554688, -55.626953125, 3.2607650756835938, 96.92796325683594, -18.41100311279297, -16.755340576171875, 106.16727447509766, 74.08979034423828], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000152.npy"}
|
|
{"epoch": 0.22320117474302498, "step": 153, "batch_size": 64, "mean": 53.41139221191406, "std": 73.9226303100586, "min": -111.9522705078125, "p10": -23.46257438659668, "median": 38.86238479614258, "p90": 155.7811401367188, "max": 317.01556396484375, "pos_frac": 0.796875, "sample": [74.79708099365234, 17.58721160888672, 14.196640014648438, 0.8729171752929688, 12.098712921142578, 9.264244079589844, 25.84819793701172, 99.91827392578125, -25.7181396484375, 159.66537475585938, 74.6290283203125, 23.71228790283203, -17.007274627685547, 68.69981384277344, 194.19590759277344, -54.87255859375, 78.39794921875, 12.627967834472656, 49.518096923828125, 89.25076293945312, 42.45024108886719, 317.01556396484375, 34.55431365966797, 56.88703918457031, 64.11280822753906, 146.71792602539062, -8.988533020019531, -25.587799072265625, 168.40879821777344, 122.31033325195312, 119.20155334472656, 23.22905731201172, 162.81900024414062, 7.34515380859375, 1.3944587707519531, 3.3500518798828125, 107.38729095458984, 68.56214904785156, -24.653106689453125, -23.83676528930664, 87.58322143554688, 32.87232971191406, 9.98468017578125, 181.44921875, 22.226638793945312, -11.809429168701172, -51.37834930419922, 50.223670959472656, 18.37267303466797, -111.9522705078125, 73.99952697753906, 262.79248046875, 65.0343017578125, -22.589462280273438, 53.725364685058594, -1.4541950225830078, 73.48332214355469, 35.27452850341797, 96.2998275756836, 116.5094985961914, 51.25300216674805, -13.974296569824219, 33.05748748779297, 96.98320007324219], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000153.npy"}
|
|
{"epoch": 0.22466960352422907, "step": 154, "batch_size": 64, "mean": 52.719879150390625, "std": 76.64498901367188, "min": -123.6744384765625, "p10": -35.215512847900385, "median": 41.63508605957031, "p90": 146.2116195678711, "max": 228.66018676757812, "pos_frac": 0.6875, "sample": [-6.415243148803711, 33.103355407714844, -123.6744384765625, 15.88920783996582, 222.19381713867188, 50.16681671142578, -4.734663009643555, 93.25906372070312, 128.894287109375, -31.41964340209961, 62.26703643798828, 185.78245544433594, -49.451751708984375, 92.6878662109375, 99.39468383789062, 56.23093032836914, -0.061248779296875, -0.275390625, 29.478588104248047, -1.2877960205078125, 81.21661376953125, 63.32246780395508, -1.9108619689941406, 51.363040924072266, -11.985176086425781, 138.39947509765625, -19.79721450805664, 145.0260009765625, 116.9690170288086, -44.780242919921875, -10.303970336914062, 148.02313232421875, -44.32286834716797, 139.62545776367188, -36.24903106689453, 160.37855529785156, 228.66018676757812, 138.9537353515625, 3.908966064453125, 139.29978942871094, 146.71974182128906, 98.91290283203125, 18.828975677490234, 64.73504638671875, 141.70289611816406, 0.30804443359375, 2.6014556884765625, 5.943464279174805, -63.65837860107422, 91.377197265625, 113.81173706054688, 3.3917694091796875, -4.179441452026367, 27.85986328125, 8.391220092773438, -62.53981399536133, 92.57456970214844, 103.318359375, 87.04007720947266, -32.80397033691406, -1.8049774169921875, 12.743330001831055, 67.08662414550781, 213.88653564453125], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000154.npy"}
|
|
{"epoch": 0.2261380323054332, "step": 155, "batch_size": 64, "mean": 54.10961151123047, "std": 70.516845703125, "min": -152.43373107910156, "p10": -9.935529708862303, "median": 51.54465866088867, "p90": 131.8363677978516, "max": 250.82640075683594, "pos_frac": 0.8125, "sample": [2.9566650390625, 65.27526092529297, 32.02522277832031, -5.12481689453125, 74.87875366210938, 11.839004516601562, 73.29188537597656, -12.209075927734375, 66.3023910522461, 16.87995719909668, -8.343704223632812, 51.7184944152832, 66.57769012451172, -33.6962890625, 73.10506439208984, 64.97589111328125, -2.1256446838378906, 43.776588439941406, 51.63263702392578, 15.094680786132812, 59.71583557128906, 51.45668029785156, 49.722015380859375, 250.82640075683594, 168.60598754882812, 8.560483932495117, 104.31005859375, 89.57649993896484, 19.22435760498047, -45.028533935546875, 16.041383743286133, 1.0016632080078125, 24.564010620117188, 65.9447021484375, 156.51803588867188, -152.43373107910156, 212.65794372558594, 35.12586212158203, 110.51292419433594, -2.5468597412109375, 4.572872161865234, 78.79386901855469, 118.22915649414062, 33.19050598144531, 241.97634887695312, 71.15176391601562, 229.43426513671875, 113.18544006347656, -49.00334167480469, 59.073944091796875, -4.5736083984375, 137.66802978515625, 9.53188705444336, 91.2236099243164, 73.5537109375, -31.993919372558594, 58.65508270263672, 108.52059173583984, 99.26543426513672, 7.443386077880859, 106.84083557128906, 0.08417320251464844, -10.617740631103516, 43.622474670410156], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000155.npy"}
|
|
{"epoch": 0.2276064610866373, "step": 156, "batch_size": 64, "mean": 33.31473159790039, "std": 79.74984741210938, "min": -207.93756103515625, "p10": -61.588134002685536, "median": 30.491395950317383, "p90": 112.44249801635742, "max": 212.17758178710938, "pos_frac": 0.734375, "sample": [109.20222473144531, 46.488365173339844, 201.0143280029297, 200.1360321044922, 127.58837890625, 75.04718017578125, -147.09710693359375, 2.5330810546875, -39.77374267578125, 15.075843811035156, -116.44691467285156, 112.48855590820312, 70.6161880493164, -14.804458618164062, -38.28468704223633, -110.5752944946289, 43.831993103027344, 85.76451110839844, 197.52308654785156, 45.6536865234375, 14.35870361328125, 94.65347290039062, 26.19439697265625, 31.576725006103516, 5.888208389282227, -0.3779754638671875, 15.364294052124023, 35.154457092285156, 212.17758178710938, -7.311790466308594, 182.45687866210938, 112.33502960205078, 107.31636810302734, 47.98240661621094, -2.0967254638671875, 3.923959732055664, 50.4306640625, -73.46249389648438, -207.93756103515625, -50.72376251220703, 64.91885375976562, -28.98755645751953, 18.68511962890625, 96.45988464355469, 19.740982055664062, -66.24429321289062, 66.65933990478516, 104.14672088623047, 60.20112609863281, 12.631296157836914, -2.549694061279297, 67.46723937988281, 41.128639221191406, 49.902793884277344, -91.6098861694336, -22.180282592773438, 29.40606689453125, 39.12733459472656, 20.983657836914062, 5.4424285888671875, 25.546371459960938, 58.897186279296875, 6.085308074951172, 92.4000244140625], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000156.npy"}
|
|
{"epoch": 0.2290748898678414, "step": 157, "batch_size": 64, "mean": 65.14584350585938, "std": 74.81951904296875, "min": -167.28170776367188, "p10": -17.373123741149893, "median": 58.67639923095703, "p90": 152.28184967041017, "max": 283.581787109375, "pos_frac": 0.84375, "sample": [122.84367370605469, 127.6495590209961, 73.74015808105469, 152.29942321777344, 90.90613555908203, 9.623519897460938, 58.00050354003906, 136.99578857421875, 10.660743713378906, 110.32012939453125, 49.4996223449707, 157.78640747070312, 43.81865692138672, 64.54876708984375, 151.16064453125, 146.17417907714844, 101.41876983642578, 76.54911804199219, 40.57096862792969, 132.39556884765625, 37.60295867919922, 17.586288452148438, 59.352294921875, -21.417556762695312, 90.48243713378906, 160.68849182128906, -3.3568267822265625, 29.24047088623047, 5.40289306640625, 102.03099060058594, -167.28170776367188, -7.936113357543945, 20.383102416992188, 32.23789978027344, 90.17327880859375, 74.7720718383789, 174.862548828125, 93.94883728027344, 86.85360717773438, -22.10633659362793, 16.891849517822266, 239.75076293945312, 26.711179733276367, 30.368606567382812, 5.042942047119141, 158.7303009033203, 89.64817810058594, -23.271535873413086, 152.2408447265625, 89.114990234375, 139.57546997070312, 33.619468688964844, -39.390071868896484, 18.36675262451172, 134.96401977539062, 108.32814025878906, 283.581787109375, 55.036231994628906, 36.42767333984375, -97.38477325439453, 22.569915771484375, -28.740814208984375, 8.858020782470703, -2.1876220703125], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000157.npy"}
|
|
{"epoch": 0.2305433186490455, "step": 158, "batch_size": 64, "mean": 67.6415786743164, "std": 95.74718475341797, "min": -81.91231536865234, "p10": -21.63700065612793, "median": 45.069026947021484, "p90": 205.2725906372071, "max": 372.3404846191406, "pos_frac": 0.75, "sample": [12.31027603149414, 9.950904846191406, 51.478004455566406, 42.385040283203125, 43.54167175292969, -18.4771728515625, 23.935302734375, 52.042930603027344, 36.15452194213867, -21.27334213256836, 151.10745239257812, -76.7342529296875, 189.24012756347656, 70.51873016357422, -4.7476806640625, -31.50537109375, -10.893098831176758, 81.33773803710938, 0.6519718170166016, 9.102104187011719, 187.5062255859375, -21.79285430908203, 185.7470703125, -76.4927978515625, 227.89138793945312, 46.59638214111328, 106.31502532958984, 18.007692337036133, 18.850204467773438, 15.231388092041016, 57.21052932739258, 62.461585998535156, -2.6477584838867188, 120.9761962890625, -27.68728256225586, -10.847122192382812, 263.796142578125, 94.6785888671875, -81.91231536865234, 252.8850555419922, 313.4306335449219, 12.447799682617188, 47.62444305419922, 372.3404846191406, 14.788726806640625, 14.6240234375, -28.66930389404297, 86.76167297363281, 138.2830810546875, 65.26323699951172, 278.0323486328125, 139.95668029785156, 88.34814453125, 122.70091247558594, -14.901395797729492, 126.49249267578125, 36.63224792480469, -7.315757751464844, 35.41790008544922, 77.16094207763672, 212.14364624023438, 49.71167755126953, 111.29449462890625, -10.407480239868164], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000158.npy"}
|
|
{"epoch": 0.23201174743024963, "step": 159, "batch_size": 64, "mean": 48.89208221435547, "std": 76.574462890625, "min": -150.05575561523438, "p10": -32.85959014892578, "median": 47.12862777709961, "p90": 152.05342102050784, "max": 258.24920654296875, "pos_frac": 0.765625, "sample": [11.843460083007812, 34.61872100830078, 94.97274780273438, -38.53949737548828, 178.07598876953125, 100.89523315429688, -150.05575561523438, -29.029277801513672, 89.04266357421875, 109.4295654296875, 69.58151245117188, -37.925514221191406, 14.117868423461914, -6.41094970703125, 57.98429870605469, -30.89459991455078, -44.928131103515625, 55.5515022277832, 144.07748413085938, 114.21943664550781, 258.24920654296875, 6.847892761230469, -32.25105285644531, 13.375732421875, 16.545440673828125, -31.88848876953125, 15.479019165039062, -33.120391845703125, 49.686737060546875, 46.308624267578125, -4.110755920410156, 155.4716796875, 46.826332092285156, 8.497180938720703, 75.14041900634766, -18.489004135131836, 52.18373107910156, -43.05030822753906, 3.5884552001953125, 187.69821166992188, 104.948486328125, 166.46450805664062, 51.001800537109375, 62.86284637451172, 9.500602722167969, 216.83731079101562, 11.767951965332031, 0.6814174652099609, 56.26531982421875, 54.826751708984375, 106.98114776611328, 26.435935974121094, -25.905540466308594, -105.04867553710938, 59.42279815673828, 72.38958740234375, 88.76480865478516, 83.43363952636719, 18.888582229614258, 231.61514282226562, 138.33682250976562, 47.43092346191406, 35.517547607421875, 106.05812072753906], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000159.npy"}
|
|
{"epoch": 0.23348017621145375, "step": 160, "batch_size": 64, "mean": 63.406612396240234, "std": 93.09505462646484, "min": -92.74552917480469, "p10": -47.50187606811523, "median": 42.17548751831055, "p90": 177.38724975585941, "max": 353.7135314941406, "pos_frac": 0.71875, "sample": [77.55587005615234, 4.100954055786133, 132.0784912109375, 124.43461608886719, -3.041889190673828, -42.503196716308594, 106.14092254638672, 134.4770965576172, 134.99847412109375, 4.643400192260742, -59.823753356933594, 5.7013397216796875, 123.03108215332031, 194.18707275390625, 97.03723907470703, -49.22377014160156, 26.660503387451172, 38.167572021484375, -92.74552917480469, 167.4803009033203, 238.1272430419922, -68.72239685058594, 157.27499389648438, -43.48412322998047, 106.49151611328125, 27.712203979492188, 9.656272888183594, -0.6905364990234375, 353.7135314941406, 56.883941650390625, 14.49981689453125, 52.619606018066406, -0.512451171875, 288.40240478515625, -5.980537414550781, 33.06428527832031, 46.18340301513672, 135.4418182373047, 271.173095703125, 235.34690856933594, 34.08135223388672, 156.9805908203125, -52.007171630859375, 169.33895874023438, 86.04176330566406, 20.172409057617188, -23.49247932434082, 24.182958602905273, 62.07762908935547, 91.20948791503906, -10.105705261230469, 32.473731994628906, 61.246368408203125, -54.30811309814453, 49.75810241699219, 70.02784729003906, -3.679807662963867, -0.4992847442626953, -68.61771392822266, 109.94705200195312, 18.161407470703125, 180.83651733398438, -13.687301635742188, 87.32696533203125], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000160.npy"}
|
|
{"epoch": 0.23494860499265785, "step": 161, "batch_size": 64, "mean": 56.16345977783203, "std": 77.2515869140625, "min": -83.05248260498047, "p10": -37.17546081542968, "median": 35.41669845581055, "p90": 165.84041748046877, "max": 246.51748657226562, "pos_frac": 0.765625, "sample": [66.10110473632812, 158.21299743652344, -32.82374572753906, 131.03099060058594, 212.2515869140625, -83.05248260498047, -39.495697021484375, -40.98918151855469, 64.86759948730469, -30.808013916015625, 47.162353515625, 95.5224609375, 67.74105834960938, -55.77342224121094, -2.454570770263672, 88.95053100585938, 228.04481506347656, 66.96601104736328, 109.20682525634766, 200.98484802246094, 24.54222869873047, 42.25822830200195, 12.112472534179688, -23.469947814941406, 37.50372314453125, -6.601932525634766, 92.40425872802734, 25.139694213867188, 223.88821411132812, 246.51748657226562, 95.51750183105469, -6.581659317016602, 14.065441131591797, -20.16242218017578, 161.57321166992188, 94.44681549072266, 5.7435455322265625, -39.04048156738281, 17.655410766601562, 73.34751892089844, 16.815353393554688, 14.596504211425781, -9.8232421875, 31.950393676757812, 33.329673767089844, 28.8565673828125, 167.66921997070312, 64.81383514404297, 12.56109619140625, 91.14127349853516, 150.67271423339844, -67.67473602294922, 11.435333251953125, 155.00723266601562, 28.548324584960938, 27.324371337890625, 26.420948028564453, 79.14582824707031, 83.80111694335938, -48.157081604003906, 31.192432403564453, 191.96572875976562, 92.92588806152344, 57.437355041503906], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000161.npy"}
|
|
{"epoch": 0.23641703377386197, "step": 162, "batch_size": 64, "mean": 58.098899841308594, "std": 82.0201187133789, "min": -100.52714538574219, "p10": -21.433411407470704, "median": 45.61793518066406, "p90": 149.95129852294923, "max": 383.3995666503906, "pos_frac": 0.765625, "sample": [-14.553054809570312, 40.68526840209961, 75.28656005859375, 182.7179718017578, 18.5704345703125, 177.97274780273438, 16.815956115722656, 107.64332580566406, 32.23033905029297, -12.697690963745117, -16.360389709472656, 149.8346710205078, 50.58073425292969, -53.198219299316406, -21.518539428710938, -10.221160888671875, 125.88288879394531, 121.37654876708984, 5.71087646484375, 96.3255615234375, 63.98780059814453, 22.232635498046875, 46.54899597167969, 13.240989685058594, 10.451162338256836, 145.5931396484375, 103.61383056640625, -7.5084686279296875, -100.52714538574219, 102.76850891113281, 88.04244232177734, -66.35997009277344, -6.165313720703125, 16.626352310180664, 126.0587387084961, 54.36333084106445, 167.91070556640625, 383.3995666503906, 64.12325286865234, 78.46317291259766, 70.18595123291016, 69.60025787353516, 132.0533905029297, 134.69741821289062, 128.77626037597656, 9.72442626953125, -4.3033905029296875, 12.843437194824219, 86.41067504882812, 27.25507354736328, -23.819808959960938, 150.00128173828125, 2.6045074462890625, -45.09522247314453, 273.954345703125, 35.254356384277344, -80.9269027709961, 18.048812866210938, 52.212989807128906, 73.1468734741211, 44.68687438964844, -21.234779357910156, 33.624755859375, 158.67935180664062], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000162.npy"}
|
|
{"epoch": 0.23788546255506607, "step": 163, "batch_size": 64, "mean": 55.70824432373047, "std": 75.24111938476562, "min": -81.2253189086914, "p10": -21.84290084838867, "median": 45.67300796508789, "p90": 166.8511947631836, "max": 245.57154846191406, "pos_frac": 0.75, "sample": [3.6998519897460938, -4.802898406982422, 207.19419860839844, 233.46022033691406, -22.611053466796875, -63.9091796875, 54.34742736816406, -11.161056518554688, -3.9658966064453125, 60.056121826171875, 92.0193099975586, -6.409523010253906, 94.68572235107422, 245.57154846191406, 78.34232330322266, 53.44434356689453, 91.26663970947266, 34.81190490722656, 17.416275024414062, -20.05054473876953, 38.560447692871094, 99.05719757080078, 78.31893920898438, 23.73400115966797, 65.33076477050781, 3.671060562133789, 108.39820861816406, -81.2253189086914, -34.81078338623047, 72.5663070678711, 163.08985900878906, 8.672805786132812, 101.78864288330078, 16.92683219909668, 56.74653625488281, -8.105554580688477, -35.69480895996094, 221.5662078857422, 101.68921661376953, 33.948638916015625, -38.69903564453125, 32.777488708496094, -2.92706298828125, 168.46319580078125, 48.75872802734375, 85.75798797607422, 3.146697998046875, 212.50001525878906, 93.90306091308594, 15.211009979248047, 42.58728790283203, 77.61306762695312, 40.77976989746094, 155.10171508789062, 71.20713806152344, -7.083038330078125, 60.534828186035156, 5.3782806396484375, -67.13447570800781, 3.0110836029052734, 210.43740844726562, 67.429931640625, 122.2495346069336, -3.3119144439697266], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000163.npy"}
|
|
{"epoch": 0.2393538913362702, "step": 164, "batch_size": 64, "mean": 65.75594329833984, "std": 76.00215911865234, "min": -102.38052368164062, "p10": -10.343553924560542, "median": 58.097612380981445, "p90": 141.3953399658203, "max": 315.566650390625, "pos_frac": 0.84375, "sample": [-102.38052368164062, 115.55471801757812, 126.35999298095703, 114.14574432373047, 163.72732543945312, 44.95270538330078, 63.750152587890625, 99.04147338867188, 66.83238220214844, 69.48228454589844, 186.32723999023438, 29.43524169921875, 0.3148975372314453, 47.393463134765625, -12.826416015625, 77.35833740234375, 56.13041687011719, 56.313194274902344, -14.120782852172852, 49.019859313964844, -3.0827808380126953, 92.89862823486328, 63.077301025390625, 120.40194702148438, 37.08326721191406, 93.53939819335938, -0.8803977966308594, -57.492698669433594, 104.6375961303711, 18.894739151000977, 32.61357116699219, -13.262588500976562, 107.44369506835938, 140.002197265625, 107.75399017333984, 96.7593994140625, 27.579673767089844, 92.55597686767578, 30.04515838623047, 127.02629089355469, 27.737014770507812, 67.07038879394531, 50.57903289794922, 41.228363037109375, 40.88641357421875, 0.1803131103515625, 59.88203048706055, 10.400726318359375, -93.98478698730469, 101.82057189941406, 60.957008361816406, 45.347145080566406, 258.0875549316406, 10.917724609375, 284.5682678222656, 186.27682495117188, 6.8268890380859375, 141.99240112304688, 83.17561340332031, -27.573204040527344, -4.550209045410156, 85.70318603515625, 315.566650390625, 0.878692626953125], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000164.npy"}
|
|
{"epoch": 0.24082232011747431, "step": 165, "batch_size": 64, "mean": 43.49615478515625, "std": 68.58180236816406, "min": -109.84720611572266, "p10": -42.886096191406246, "median": 35.73799514770508, "p90": 142.26251525878908, "max": 189.19264221191406, "pos_frac": 0.734375, "sample": [66.31163787841797, 68.64606475830078, -43.74864196777344, 16.111549377441406, -40.87348937988281, 1.1000003814697266, 69.0857925415039, 77.30206298828125, -1.3645782470703125, -53.110511779785156, 79.29004669189453, -109.84720611572266, -30.533721923828125, 36.75825500488281, 31.517730712890625, -62.40703582763672, -8.90591812133789, -17.743896484375, 114.5933609008789, 121.39675903320312, 73.54518127441406, 31.05721664428711, 15.56167221069336, 182.12445068359375, -82.67204284667969, 63.393096923828125, -3.4879894256591797, -40.41664123535156, 11.971488952636719, 112.44300842285156, -46.790672302246094, 18.9399471282959, 143.95809936523438, 138.30615234375, 26.1162109375, -22.15601348876953, 68.12946319580078, 23.32872772216797, 52.84014129638672, 6.674295425415039, 35.03346252441406, 45.00079345703125, 90.67829895019531, 63.473793029785156, 171.89154052734375, -2.1150970458984375, 78.42399597167969, 58.8441162109375, 18.48810577392578, -50.20368957519531, 18.484603881835938, 146.4442138671875, 36.442527770996094, -24.83612823486328, 44.4952507019043, 183.0193634033203, 22.424163818359375, 189.19264221191406, 177.0789794921875, 109.77107238769531, 79.83026123046875, 11.267799377441406, 56.290985107421875, 137.8887939453125], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000165.npy"}
|
|
{"epoch": 0.2422907488986784, "step": 166, "batch_size": 64, "mean": 77.64604187011719, "std": 100.64749908447266, "min": -46.13615417480469, "p10": -19.024490737915038, "median": 56.554019927978516, "p90": 203.91344604492198, "max": 411.2655029296875, "pos_frac": 0.84375, "sample": [352.521240234375, 228.5935516357422, 18.359439849853516, 355.19384765625, -21.09815216064453, 20.913158416748047, 18.206050872802734, 56.52471923828125, 151.36376953125, 6.15771484375, 87.91352844238281, -4.819305419921875, 5.669683456420898, 118.30496215820312, -19.40918731689453, 58.65007781982422, 112.92617797851562, 32.083099365234375, 23.95699691772461, 128.12557983398438, 10.623163223266602, 411.2655029296875, 28.711666107177734, -30.864116668701172, -46.13615417480469, -36.73270034790039, 35.012306213378906, 94.1600112915039, -43.30708312988281, 172.01229858398438, 215.9226837158203, 331.0379333496094, 90.40370178222656, 72.31312561035156, 20.693695068359375, 287.9569091796875, 12.670234680175781, 137.46234130859375, 59.746185302734375, 97.3399658203125, -2.176959991455078, 69.09347534179688, 15.418937683105469, 24.069061279296875, 158.74378967285156, 97.44755554199219, 175.8918914794922, 12.851360321044922, 65.69973754882812, 109.36703491210938, 27.148052215576172, 79.97911071777344, -40.970863342285156, 46.2918586730957, 5.50225830078125, -18.12686538696289, 98.93470001220703, 8.12152099609375, 79.86138916015625, 8.061264038085938, 83.70297241210938, 13.870626449584961, 143.55258178710938, 56.58332061767578], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000166.npy"}
|
|
{"epoch": 0.24375917767988253, "step": 167, "batch_size": 64, "mean": 82.669921875, "std": 83.33100891113281, "min": -91.95823669433594, "p10": -6.566954040527343, "median": 67.63229751586914, "p90": 192.14661865234385, "max": 293.702880859375, "pos_frac": 0.84375, "sample": [101.0765380859375, 61.062503814697266, 109.02194213867188, 165.6728057861328, 92.16729736328125, 115.72247314453125, 53.0167236328125, 53.572410583496094, 94.1410140991211, 50.4474983215332, 293.702880859375, 152.87503051757812, 61.59552764892578, -6.416168212890625, 18.537689208984375, 49.917781829833984, 65.20384216308594, -34.100181579589844, 100.86546325683594, 144.53736877441406, 27.239389419555664, 33.02208709716797, 54.70263671875, 39.432891845703125, 89.27227783203125, -91.95823669433594, 216.5657501220703, 167.86831665039062, -5.36260986328125, 152.78753662109375, -10.81151008605957, 62.81379699707031, 81.43348693847656, 33.946075439453125, 271.07489013671875, 122.68473815917969, 223.64096069335938, 70.06075286865234, 155.0617218017578, -85.8927001953125, 161.2944793701172, 202.55160522460938, 246.72283935546875, 117.36148834228516, 282.4039611816406, -2.2058792114257812, 137.77981567382812, 81.60665893554688, 155.50094604492188, 144.9654083251953, 79.21769714355469, 21.940231323242188, 15.466272354125977, 27.795452117919922, 33.19770812988281, 113.96297454833984, 76.04503631591797, 58.550872802734375, 16.360946655273438, -60.757835388183594, 35.0669059753418, 11.423479080200195, -8.947463989257812, -6.6315765380859375], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000167.npy"}
|
|
{"epoch": 0.24522760646108663, "step": 168, "batch_size": 64, "mean": 62.91740417480469, "std": 77.00724029541016, "min": -61.33264923095703, "p10": -23.665044784545895, "median": 44.37376403808594, "p90": 147.5182098388672, "max": 310.3279113769531, "pos_frac": 0.796875, "sample": [22.342910766601562, 81.24971771240234, 26.22713851928711, 20.04989242553711, 6.353445053100586, 31.696903228759766, 118.46228790283203, -20.762775421142578, 28.701618194580078, 4.737335205078125, -61.33264923095703, 138.0239715576172, 310.3279113769531, 72.02484130859375, -24.90887451171875, 148.05618286132812, 133.5755615234375, 171.0172882080078, 146.262939453125, 43.065834045410156, 175.91317749023438, 214.297607421875, 144.190673828125, 125.65767669677734, -18.39307403564453, -39.75859069824219, -0.4366722106933594, 104.37008666992188, 113.00827026367188, 92.6296157836914, 58.865203857421875, 27.091339111328125, -41.07447052001953, 70.57786560058594, 35.669212341308594, 108.43028259277344, -38.484107971191406, -6.80914306640625, -7.700336456298828, 44.421966552734375, 26.477006912231445, 77.33427429199219, 44.3255615234375, 69.80558776855469, 71.87828063964844, 60.63147735595703, 20.718469619750977, -31.969947814941406, 193.61978149414062, 42.50189208984375, 120.06916809082031, 19.024015426635742, 30.70342445373535, 18.565643310546875, 99.92945098876953, 45.52513122558594, 30.909318923950195, -60.03845977783203, 52.40604782104492, -6.088874816894531, 304.0426940917969, 19.47003936767578, 91.57684326171875, 127.65896606445312], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000168.npy"}
|
|
{"epoch": 0.24669603524229075, "step": 169, "batch_size": 64, "mean": 55.930057525634766, "std": 96.64671325683594, "min": -169.24436950683594, "p10": -41.7183609008789, "median": 35.698530197143555, "p90": 171.3262924194336, "max": 416.2378234863281, "pos_frac": 0.765625, "sample": [51.804542541503906, 70.93585205078125, -1.6254539489746094, 18.245758056640625, 152.92945861816406, 96.6787338256836, 18.127120971679688, 100.14186096191406, 67.5406723022461, 27.082530975341797, -11.604244232177734, 49.67969512939453, -169.24436950683594, -0.7631721496582031, 40.80026626586914, -13.446731567382812, 23.492828369140625, -43.75511169433594, 65.16877746582031, 119.26421356201172, 112.28118896484375, -36.9659423828125, 44.232177734375, -49.46187210083008, 172.61058044433594, 168.32962036132812, 56.98456954956055, 18.4576416015625, 12.774765014648438, 24.960172653198242, 166.91854858398438, 6.691017150878906, 17.552404403686523, 54.603416442871094, 77.1712646484375, 103.93215942382812, 179.83123779296875, 15.843351364135742, 68.37979125976562, 167.40199279785156, 10.677316665649414, 341.50262451171875, -69.38253021240234, 34.73078918457031, 27.176612854003906, 41.044227600097656, 24.0494384765625, -47.77790069580078, 273.8699951171875, -3.6955108642578125, 416.2378234863281, -0.3424072265625, 85.26583099365234, 23.366722106933594, 208.7049560546875, 0.573028564453125, 68.45233154296875, 6.513427734375, 36.6662712097168, -8.091949462890625, -90.12315368652344, 76.6429443359375, 185.3910369873047, -105.90949249267578], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000169.npy"}
|
|
{"epoch": 0.24816446402349487, "step": 170, "batch_size": 64, "mean": 67.11624145507812, "std": 78.97372436523438, "min": -140.39157104492188, "p10": -16.400668334960937, "median": 57.961679458618164, "p90": 163.76711425781252, "max": 244.80197143554688, "pos_frac": 0.828125, "sample": [150.1986083984375, -93.26799774169922, 82.95233154296875, 67.09552764892578, 160.69564819335938, 59.23172378540039, 116.19444274902344, 12.00970458984375, 117.80397033691406, -5.7845001220703125, 73.84304809570312, 0.23366928100585938, 63.280029296875, -37.71366882324219, 6.487375259399414, -24.406173706054688, 86.6594009399414, 213.84072875976562, 26.95745849609375, 208.5657958984375, 4.178142547607422, -3.0824718475341797, 131.88710021972656, 42.31295394897461, 1.4618301391601562, 35.636558532714844, -140.39157104492188, -15.388191223144531, 59.00421142578125, 164.79620361328125, 81.88719177246094, 105.79243469238281, 51.331668853759766, 69.9581298828125, 155.80105590820312, -16.83458709716797, 243.83782958984375, 5.187980651855469, 1.4599246978759766, 244.80197143554688, 73.28652954101562, -43.04400634765625, 56.91914749145508, 115.33491516113281, 156.03326416015625, -17.721435546875, 33.98564147949219, 0.11271858215332031, 25.45685577392578, 47.86663055419922, 81.1488037109375, 54.072044372558594, 161.36590576171875, 39.642574310302734, 128.30267333984375, 50.44763946533203, 143.15635681152344, 175.01919555664062, -7.510181427001953, 158.51626586914062, 192.25601196289062, 17.4953556060791, 126.09733581542969, 18.68366241455078], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000170.npy"}
|
|
{"epoch": 0.24963289280469897, "step": 171, "batch_size": 64, "mean": 55.58562469482422, "std": 95.6494369506836, "min": -163.9732208251953, "p10": -54.26613922119141, "median": 53.86281967163086, "p90": 176.09516906738287, "max": 431.25213623046875, "pos_frac": 0.734375, "sample": [-44.014312744140625, 184.46656799316406, 44.55504608154297, 63.475982666015625, -60.49510192871094, -18.612754821777344, 9.486785888671875, 77.37699890136719, -53.54322814941406, 110.01089477539062, 10.10012435913086, 123.29550170898438, 80.02674865722656, 86.01303100585938, 47.11308288574219, -18.16851043701172, 78.36592102050781, -54.81880187988281, 85.7877197265625, -5.412635803222656, 161.74237060546875, 76.56849670410156, 62.98765563964844, 65.84278106689453, 262.5216979980469, 60.134334564208984, -66.63532257080078, 431.25213623046875, -75.42142486572266, 43.95159149169922, 80.73741149902344, -163.9732208251953, 289.95831298828125, 100.13153076171875, 66.42405700683594, -54.575958251953125, 182.24636840820312, 84.75230407714844, 18.866653442382812, 236.61471557617188, 34.229278564453125, 53.922325134277344, 139.21347045898438, -5.434988021850586, -74.79780578613281, 20.238067626953125, 64.28751373291016, 53.803314208984375, -40.282283782958984, 59.55458068847656, -11.730155944824219, -36.9345703125, 106.57738494873047, 38.22276306152344, 60.47918701171875, 0.49139976501464844, 23.593170166015625, 131.03897094726562, -9.866233825683594, 13.338363647460938, 70.47608184814453, 14.603439331054688, 202.44851684570312, 40.872501373291016], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000171.npy"}
|
|
{"epoch": 0.2511013215859031, "step": 172, "batch_size": 64, "mean": 57.24609375, "std": 84.56393432617188, "min": -104.4842529296875, "p10": -37.516748046875, "median": 66.17167663574219, "p90": 150.14990844726566, "max": 351.11663818359375, "pos_frac": 0.6875, "sample": [-15.809226989746094, 96.81791687011719, -98.90386962890625, 77.20648193359375, 21.239990234375, -26.400962829589844, 117.45889282226562, 77.72149658203125, 53.043190002441406, -52.80201721191406, -93.09431457519531, 45.22030258178711, 21.83863067626953, -36.697845458984375, 128.06600952148438, 74.04096221923828, 20.36904525756836, 48.05668640136719, -44.1367301940918, 69.0321273803711, -17.411949157714844, 82.39639282226562, 129.951416015625, 123.40565490722656, 11.979942321777344, 61.277130126953125, 164.87643432617188, 152.97500610351562, -37.867706298828125, 213.34036254882812, -18.985496520996094, 89.29357147216797, 101.80693817138672, 68.30862426757812, 96.49264526367188, 243.00387573242188, 351.11663818359375, 46.0941162109375, 90.15949249267578, 30.28799819946289, -18.247459411621094, -31.415069580078125, 64.77786254882812, 67.88626098632812, 143.55801391601562, -13.779083251953125, -3.6176395416259766, 33.29058074951172, -15.813003540039062, 139.59014892578125, 126.17913818359375, -2.7174835205078125, -21.381576538085938, 90.42015075683594, 87.16449737548828, -104.4842529296875, 67.56549072265625, 86.69943237304688, 192.81475830078125, 69.2550048828125, -12.379753112792969, -64.48892974853516, 180.88970947265625, 137.2154541015625], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000172.npy"}
|
|
{"epoch": 0.2525697503671072, "step": 173, "batch_size": 64, "mean": 57.409847259521484, "std": 72.24809265136719, "min": -130.00282287597656, "p10": -11.264738845825189, "median": 44.76909065246582, "p90": 158.0357574462891, "max": 275.67755126953125, "pos_frac": 0.84375, "sample": [36.465911865234375, 61.60333251953125, 41.73277282714844, 6.131446838378906, -2.4444656372070312, 62.10285949707031, 6.261562347412109, 122.23483276367188, 101.85684204101562, 5.461639404296875, 13.346923828125, -26.107215881347656, 13.908613204956055, 11.606956481933594, 63.300987243652344, 206.92872619628906, 51.65422821044922, 50.6214485168457, 53.575416564941406, 150.21990966796875, 85.48118591308594, 59.61009216308594, 79.33524322509766, -34.67967224121094, 44.836814880371094, 31.52674102783203, 275.67755126953125, 22.00560188293457, 135.3121337890625, 161.38540649414062, 51.087242126464844, 20.177091598510742, -36.46238708496094, 113.72416687011719, 89.35693359375, -0.5547943115234375, 102.29500579833984, 41.697021484375, -4.3209991455078125, 192.14825439453125, 70.92768859863281, 132.43685913085938, -14.24062728881836, 30.13903045654297, 4.3941802978515625, 10.772686004638672, 196.74581909179688, 96.86070251464844, -36.2491455078125, 11.70367431640625, 77.00646209716797, 44.70136642456055, 99.32290649414062, -48.7835693359375, -130.00282287597656, 243.58551025390625, 34.275672912597656, 3.294891357421875, 71.52497863769531, 17.673038482666016, 179.94393920898438, 45.636985778808594, 43.174076080322266, 29.284454345703125], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000173.npy"}
|
|
{"epoch": 0.2540381791483113, "step": 174, "batch_size": 64, "mean": 65.10839080810547, "std": 89.6675033569336, "min": -96.74903869628906, "p10": -35.03410186767577, "median": 47.842281341552734, "p90": 199.27794494628915, "max": 359.5869445800781, "pos_frac": 0.78125, "sample": [13.408119201660156, 1.649261474609375, -40.08191680908203, -44.78068542480469, 1.952606201171875, 27.929237365722656, -1.0224800109863281, 22.86298942565918, 92.3132553100586, 148.66793823242188, 174.6584014892578, -6.091224670410156, 55.036834716796875, 14.907907485961914, 57.19782257080078, -19.158042907714844, 106.04037475585938, 120.74046325683594, 115.96284484863281, 219.90362548828125, 48.88236999511719, 46.087093353271484, 41.333518981933594, -8.862964630126953, 237.45892333984375, -77.47639465332031, 103.94376373291016, 227.74508666992188, 12.34637451171875, 80.34962463378906, 20.838598251342773, -11.821868896484375, 41.59682083129883, -53.32948684692383, 91.582275390625, -23.25586700439453, 110.24270629882812, 83.01603698730469, -82.53317260742188, 209.8291778564453, 236.87124633789062, 294.81207275390625, 17.91537857055664, 84.14363098144531, -1.4219417572021484, -96.74903869628906, 0.000579833984375, 97.28966522216797, 87.97932434082031, 97.0910415649414, 77.87640380859375, 46.80219268798828, 25.074378967285156, 110.92777252197266, -46.618446350097656, 156.46974182128906, 125.49365234375, 86.89973449707031, 36.84905242919922, 13.398550033569336, 359.5869445800781, 87.11991119384766, 79.23881530761719, 29.816274642944336], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000174.npy"}
|
|
{"epoch": 0.2555066079295154, "step": 175, "batch_size": 64, "mean": 89.68682861328125, "std": 85.41228485107422, "min": -41.569305419921875, "p10": -1.9155673980712886, "median": 72.64694213867188, "p90": 205.64077758789068, "max": 336.85919189453125, "pos_frac": 0.859375, "sample": [12.867963790893555, 30.811111450195312, 49.75196838378906, -41.569305419921875, 77.83271789550781, 97.54901123046875, 188.48150634765625, 65.43199920654297, 21.88071060180664, 135.69728088378906, 255.66184997558594, -40.857906341552734, -1.545745849609375, 247.07122802734375, 73.509765625, 31.3607177734375, 188.29281616210938, 155.4969024658203, 162.60107421875, 141.91009521484375, 142.35794067382812, -30.885475158691406, 174.33343505859375, 336.85919189453125, 27.10553741455078, 92.04999542236328, 276.3337707519531, 44.667579650878906, 118.20486450195312, 65.93978881835938, -2.1096725463867188, 41.316001892089844, 238.52011108398438, 22.92325210571289, 10.597373962402344, 32.208290100097656, -2.0740623474121094, 212.9947509765625, 7.739349365234375, 57.25592041015625, 101.78346252441406, 98.62158203125, 108.59496307373047, 174.7938232421875, -0.39177513122558594, 161.76754760742188, 177.46035766601562, 165.3145294189453, -16.84030532836914, 106.0051498413086, 143.74916076660156, 75.77825164794922, 71.78411865234375, 110.07511138916016, 35.699501037597656, 50.69308090209961, 102.40692138671875, 4.714300155639648, 40.98162078857422, 237.24215698242188, -41.427696228027344, 53.02861022949219, 18.984024047851562, 40.56474304199219], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000175.npy"}
|
|
{"epoch": 0.25697503671071953, "step": 176, "batch_size": 64, "mean": 73.48348236083984, "std": 90.5433120727539, "min": -46.348609924316406, "p10": -18.845152473449705, "median": 55.15455627441406, "p90": 189.19462890625002, "max": 392.83172607421875, "pos_frac": 0.796875, "sample": [24.656219482421875, 24.176605224609375, 72.0999755859375, 144.6421356201172, 60.781551361083984, 12.338809967041016, 14.365333557128906, 234.36422729492188, 14.990341186523438, -4.39276123046875, -21.790050506591797, 334.4493713378906, -19.05970573425293, -29.3358154296875, 184.35867309570312, 62.25749206542969, 1.909912109375, 196.65562438964844, 127.23222351074219, -0.4633331298828125, 3.9411773681640625, 71.31641387939453, 103.69683837890625, -38.45587921142578, 191.26718139648438, -3.6469249725341797, 22.287012100219727, 154.75535583496094, 135.04019165039062, -2.0577926635742188, 88.44490051269531, -38.393646240234375, 104.03695678710938, -46.348609924316406, 31.23193359375, 121.5859603881836, -22.773204803466797, 52.216224670410156, 124.89385223388672, 114.81538391113281, 138.558837890625, 1.0627632141113281, -18.344528198242188, 0.5216789245605469, 126.47303771972656, 110.76960754394531, 113.92388916015625, 58.09288787841797, 125.04832458496094, 243.40234375, 39.900779724121094, -11.923789978027344, 28.982894897460938, 14.08713150024414, 116.99839782714844, 60.55194091796875, 0.8558120727539062, 20.163127899169922, 117.51984405517578, 5.191547393798828, 147.24232482910156, 392.83172607421875, 22.210166931152344, 246.7319793701172], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000176.npy"}
|
|
{"epoch": 0.25844346549192365, "step": 177, "batch_size": 64, "mean": 73.7649917602539, "std": 90.20797729492188, "min": -57.55003356933594, "p10": -17.643434906005858, "median": 43.51518630981445, "p90": 191.4738540649414, "max": 436.5528869628906, "pos_frac": 0.75, "sample": [116.61190795898438, 42.428009033203125, 154.1250457763672, 87.25101470947266, 56.16926574707031, 106.89910888671875, -36.83247375488281, 63.575286865234375, 171.999267578125, 131.66798400878906, 190.36595153808594, 35.09557342529297, 76.83877563476562, 191.94866943359375, 105.16975402832031, 67.21441650390625, 22.273658752441406, 264.9264831542969, 19.07624053955078, 151.4140625, 28.14710807800293, 138.44276428222656, -54.0518798828125, 130.48345947265625, -3.523649215698242, 34.992950439453125, 192.88714599609375, -18.433303833007812, -1.9989776611328125, 17.712692260742188, 42.54572296142578, 229.8848419189453, 110.19155883789062, -7.4583892822265625, 28.31927490234375, 44.484649658203125, -0.6994552612304688, 218.2863006591797, -1.8573017120361328, -22.277198791503906, -57.55003356933594, 107.16438293457031, 1.4842815399169922, 42.43895721435547, -49.862823486328125, 118.77723693847656, 120.66557312011719, 7.140682220458984, -3.1254348754882812, -3.0961837768554688, 153.2692108154297, 31.26573371887207, 77.94622802734375, -15.800407409667969, 132.07760620117188, 27.899280548095703, 27.214914321899414, -18.463056564331055, 136.2962646484375, 22.894882202148438, 436.5528869628906, 233.76914978027344, 80.0576400756836, -12.353927612304688], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000177.npy"}
|
|
{"epoch": 0.2599118942731278, "step": 178, "batch_size": 64, "mean": 93.52775573730469, "std": 110.83756256103516, "min": -123.18147277832031, "p10": -21.930609130859363, "median": 77.49381256103516, "p90": 237.592594909668, "max": 462.8205871582031, "pos_frac": 0.8125, "sample": [57.168941497802734, 13.316291809082031, 129.23193359375, 8.59503173828125, 106.65843963623047, 72.3254165649414, 98.79022979736328, 31.599754333496094, 20.038955688476562, 227.76829528808594, -0.5580520629882812, 112.03448486328125, 200.69564819335938, 89.7200927734375, 44.617835998535156, 130.23634338378906, 329.70086669921875, 77.50285339355469, 462.8205871582031, -26.669021606445312, 170.51658630371094, 50.055320739746094, 11.204620361328125, 54.913963317871094, 14.595296859741211, 153.73974609375, 267.9296569824219, -32.242881774902344, 45.902740478515625, -4.076332092285156, -40.27650451660156, 77.48477172851562, 190.39324951171875, 20.580337524414062, 104.78368377685547, 165.2063446044922, 81.06942749023438, -56.3681640625, 59.64751434326172, 195.48080444335938, 25.567283630371094, 92.49209594726562, -123.18147277832031, 358.4839172363281, -6.658428192138672, 128.2308807373047, 91.18585205078125, 131.48593139648438, -28.790441513061523, 363.4870300292969, 30.488000869750977, 109.66455841064453, 241.80300903320312, 0.2244243621826172, 33.769287109375, 158.4006805419922, 79.09382629394531, -10.874313354492188, 48.60209655761719, 184.14285278320312, 263.4605712890625, -62.589263916015625, 165.98471069335938, -4.8314666748046875], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000178.npy"}
|
|
{"epoch": 0.26138032305433184, "step": 179, "batch_size": 64, "mean": 71.09725952148438, "std": 103.93403625488281, "min": -196.17222595214844, "p10": -47.21429634094238, "median": 73.13211822509766, "p90": 225.24726562500004, "max": 288.8287658691406, "pos_frac": 0.71875, "sample": [95.53069305419922, 135.02037048339844, 147.17727661132812, 288.8287658691406, 50.418853759765625, -62.240997314453125, 151.5006561279297, 38.09440994262695, 0.8726043701171875, 3.955068588256836, 86.99870300292969, 56.81529998779297, 30.319351196289062, 73.63294219970703, -9.780494689941406, 99.44644927978516, -41.792327880859375, 263.00701904296875, -18.629344940185547, -23.640167236328125, 63.82122802734375, -46.78292465209961, 252.80062866210938, 22.718996047973633, 196.77574157714844, 114.37539672851562, 135.6951446533203, 240.0315704345703, 154.1036376953125, 46.275550842285156, 72.76085662841797, 252.30648803710938, 262.00177001953125, 81.67755126953125, 76.41537475585938, 96.88113403320312, -27.32482147216797, -3.0147438049316406, 1.2699508666992188, 73.50337982177734, 73.70214080810547, -27.549118041992188, 73.85092163085938, -48.754852294921875, 133.839599609375, 217.37820434570312, -70.9228744506836, 163.1585235595703, 102.97029113769531, -56.70056915283203, -32.963096618652344, 180.40455627441406, 30.719253540039062, 175.07492065429688, 228.61972045898438, 197.6322784423828, -119.9127197265625, 140.9423828125, -40.918418884277344, -47.399169921875, -196.17222595214844, 54.327484130859375, 22.492673873901367, -35.421852111816406], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000179.npy"}
|
|
{"epoch": 0.26284875183553597, "step": 180, "batch_size": 64, "mean": 85.80658721923828, "std": 94.66084289550781, "min": -141.9830780029297, "p10": -9.932262039184566, "median": 80.02901458740234, "p90": 206.79309234619143, "max": 367.30340576171875, "pos_frac": 0.828125, "sample": [85.0574951171875, 144.33544921875, -5.2542572021484375, -5.075405120849609, -62.333580017089844, 367.30340576171875, -11.62313461303711, 45.06309509277344, 91.94488525390625, 66.35316467285156, 2.9291610717773438, 175.7806396484375, -141.9830780029297, 136.2930450439453, 126.32939147949219, 84.24896240234375, 92.1409912109375, 75.80906677246094, 91.62102508544922, 42.61964416503906, 145.2391357421875, -78.14828491210938, 29.757129669189453, 231.77902221679688, 67.6529769897461, 200.19415283203125, 130.36947631835938, 13.987152099609375, 29.55283546447754, 41.0222282409668, 209.68470764160156, 5.65995979309082, 189.98971557617188, 136.37875366210938, 86.79558563232422, -5.10279655456543, 94.00464630126953, 172.30508422851562, 4.438629150390625, 154.2686767578125, 73.48799896240234, 6.596748352050781, 219.90386962890625, 54.370819091796875, -84.97903442382812, 58.6533317565918, 14.853713989257812, 202.01869201660156, 200.22862243652344, 54.39745330810547, 100.21859741210938, 208.83926391601562, -18.8315486907959, 277.9346923828125, 65.22574615478516, -5.9868927001953125, 145.6951904296875, 93.42752075195312, 25.42127227783203, -49.26361846923828, 225.891845703125, 192.73854064941406, 20.66564178466797, 148.72421264648438], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000180.npy"}
|
|
{"epoch": 0.2643171806167401, "step": 181, "batch_size": 64, "mean": 54.42787170410156, "std": 85.5937271118164, "min": -143.85110473632812, "p10": -41.2041763305664, "median": 42.08500099182129, "p90": 174.49432525634768, "max": 284.26934814453125, "pos_frac": 0.78125, "sample": [89.9212646484375, -31.790267944335938, 15.492761611938477, 57.80875015258789, 94.13130950927734, -5.838727951049805, 19.24666976928711, 35.037086486816406, 62.480621337890625, 85.11207580566406, 17.08254623413086, 26.532747268676758, 4.490209579467773, 11.586112976074219, -101.21940612792969, 177.74990844726562, 74.37715148925781, -9.683616638183594, 71.07410430908203, -4.417469024658203, 41.23160934448242, -45.23870849609375, -9.599090576171875, 166.89796447753906, -45.87274169921875, 70.63887023925781, 94.16064453125, 234.04530334472656, 66.92877197265625, 37.82673645019531, 111.70811462402344, 61.63677215576172, -132.77915954589844, 42.938392639160156, 25.410079956054688, 54.03400802612305, 9.776912689208984, 127.09317016601562, 136.13714599609375, -47.21315002441406, 25.19591522216797, -143.85110473632812, 189.19166564941406, -6.460941314697266, 235.36207580566406, 137.9724578857422, -103.10993957519531, 54.01591491699219, -6.591033935546875, 103.6539077758789, 188.5545196533203, 284.26934814453125, 113.84259033203125, 29.56118392944336, 112.02157592773438, 1.5769081115722656, 3.980947494506836, 4.70721435546875, 245.54127502441406, 51.393619537353516, 30.23059844970703, 116.53489685058594, 91.80391693115234, 35.05071258544922], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000181.npy"}
|
|
{"epoch": 0.2657856093979442, "step": 182, "batch_size": 64, "mean": 89.52283477783203, "std": 102.2699966430664, "min": -185.6129150390625, "p10": -32.61595230102538, "median": 72.21912384033203, "p90": 219.58730468750002, "max": 287.8734130859375, "pos_frac": 0.8125, "sample": [231.0546875, 63.940673828125, 248.32949829101562, 129.13742065429688, 70.78436279296875, 171.2198944091797, 190.21969604492188, 211.73358154296875, 272.3624267578125, 67.70378112792969, 73.65388488769531, 42.6198844909668, 26.238088607788086, -11.463481903076172, 204.24462890625, -185.6129150390625, 192.04794311523438, 45.82273864746094, 4.967329025268555, 33.32025146484375, 287.8734130859375, -5.916315078735352, 222.95318603515625, 87.18315124511719, 110.49060821533203, -42.389060974121094, 201.54244995117188, 61.95952224731445, 15.16811752319336, 60.453575134277344, 128.1983642578125, 201.75906372070312, -54.887779235839844, -92.33336639404297, 120.53241729736328, 191.76394653320312, 50.99076843261719, 86.2922592163086, 156.126220703125, 198.31219482421875, -36.02666473388672, 60.232757568359375, 38.589576721191406, 115.95811462402344, 95.63201904296875, 138.479736328125, 131.02398681640625, 119.248779296875, -69.14851379394531, 3.920764923095703, 287.4156188964844, 121.16460418701172, 53.21710205078125, -0.18704795837402344, -24.657623291015625, 10.027795791625977, -97.22505950927734, 44.436065673828125, 208.17730712890625, 26.20763397216797, 262.67352294921875, -18.97079849243164, 53.51372528076172, 137.36111450195312], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000182.npy"}
|
|
{"epoch": 0.26725403817914833, "step": 183, "batch_size": 64, "mean": 53.421424865722656, "std": 80.85557556152344, "min": -97.4327621459961, "p10": -46.30292434692382, "median": 49.52614212036133, "p90": 149.2952438354492, "max": 260.110595703125, "pos_frac": 0.796875, "sample": [43.43205261230469, 61.623390197753906, 6.652402877807617, 49.90766906738281, 2.8285140991210938, 149.6551971435547, 115.68153381347656, 113.06989288330078, 57.830108642578125, 39.386756896972656, -33.48637390136719, 96.42440795898438, 50.1776008605957, 15.654253005981445, 18.03265380859375, 69.28919982910156, 260.110595703125, 201.9849853515625, 126.41250610351562, 30.3799991607666, 50.38404083251953, 79.8992919921875, 231.3280792236328, 91.15299987792969, 11.624038696289062, 3.2140026092529297, 65.40847778320312, 40.68793869018555, 16.44683837890625, -93.46589660644531, 148.45535278320312, 71.35678100585938, -38.24999237060547, 9.793685913085938, 119.26555633544922, 16.31146240234375, 44.317264556884766, 97.24175262451172, 119.55736541748047, 213.19534301757812, -80.23068237304688, -17.394134521484375, 250.1956787109375, -49.754180908203125, 34.84668731689453, -25.742919921875, 84.81897735595703, -72.81292724609375, -71.3559341430664, -74.42212677001953, 145.96017456054688, 67.92485046386719, 155.8071746826172, 139.779541015625, 52.65153121948242, 49.144615173339844, 19.007476806640625, -20.41173553466797, 85.52071380615234, 71.08807373046875, -97.4327621459961, -27.510570526123047, 18.0684814453125, 8.253387451171875], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000183.npy"}
|
|
{"epoch": 0.2687224669603524, "step": 184, "batch_size": 64, "mean": 53.54302978515625, "std": 81.26734924316406, "min": -86.50550079345703, "p10": -22.143684387207028, "median": 25.577157974243164, "p90": 162.857829284668, "max": 263.9018249511719, "pos_frac": 0.78125, "sample": [14.303558349609375, -60.78217315673828, 53.80365753173828, 73.44072723388672, 157.6803436279297, 113.63555908203125, 0.601226806640625, 9.563369750976562, 121.48554229736328, -9.482650756835938, 260.2891540527344, 241.73001098632812, 46.617210388183594, 142.81893920898438, -15.356376647949219, 142.62484741210938, 25.491348266601562, 144.55715942382812, 85.07501983642578, 199.44602966308594, -9.954580307006836, 228.94863891601562, -45.416259765625, 13.002937316894531, 23.922611236572266, 36.60375213623047, 36.63140869140625, 5.915168762207031, 86.87950134277344, 7.882448196411133, 45.57032775878906, 59.73876953125, 119.37147521972656, 157.6184539794922, -64.27473449707031, 167.09451293945312, 10.106765747070312, -23.355796813964844, -16.236316680908203, 25.662967681884766, 79.21581268310547, -10.740676879882812, 16.98483657836914, 5.308099746704102, 24.365501403808594, 165.07675170898438, 51.703346252441406, 46.85503005981445, -19.31542205810547, -77.39962768554688, -40.43263626098633, 37.84101867675781, 4.882305145263672, 9.084243774414062, -86.50550079345703, 17.835098266601562, 3.1058502197265625, 147.46905517578125, 24.8380126953125, 60.03337097167969, 10.233421325683594, -3.633941650390625, 82.79359436035156, 263.9018249511719], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000184.npy"}
|
|
{"epoch": 0.2701908957415565, "step": 185, "batch_size": 64, "mean": 44.08148956298828, "std": 87.09733581542969, "min": -140.44436645507812, "p10": -39.709678649902344, "median": 23.797234535217285, "p90": 167.93706512451175, "max": 330.3720397949219, "pos_frac": 0.703125, "sample": [-124.9258041381836, -55.1859245300293, -4.663583755493164, 79.41386413574219, 65.47715759277344, 21.00907325744629, -6.006376266479492, 51.87176513671875, 266.36114501953125, 13.590080261230469, 12.43160629272461, 84.84674835205078, 88.45407104492188, 3.205364227294922, 189.1451416015625, 52.966094970703125, -9.866031646728516, 26.58539581298828, 92.84312438964844, -58.4866943359375, -6.496320724487305, 39.816707611083984, 19.58816909790039, 57.76722717285156, 62.31881332397461, 13.655250549316406, -66.249267578125, 28.45836639404297, 59.16770935058594, -75.57699584960938, 70.11468505859375, 6.846263885498047, 93.14588165283203, 78.80619812011719, 8.857574462890625, 276.5986328125, -38.711952209472656, 53.27062225341797, 159.14877319335938, -15.54825210571289, 15.899120330810547, 16.18788719177246, -13.590858459472656, -11.631149291992188, -11.085515975952148, 10.037290573120117, 218.0577392578125, 125.12332153320312, 121.45486450195312, 7.128568649291992, 330.3720397949219, -0.15688323974609375, 31.284927368164062, 50.852352142333984, -8.788284301757812, 171.70347595214844, 31.231765747070312, 100.10856628417969, -140.44436645507812, 178.44088745117188, -40.13727569580078, 54.014060974121094, 3.8029937744140625, -32.694637298583984], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000185.npy"}
|
|
{"epoch": 0.27165932452276065, "step": 186, "batch_size": 64, "mean": 81.53387451171875, "std": 87.4080581665039, "min": -118.47021484375, "p10": -8.876454734802241, "median": 66.46780395507812, "p90": 195.5441589355469, "max": 348.98272705078125, "pos_frac": 0.84375, "sample": [-118.47021484375, 118.22915649414062, 236.95132446289062, 118.03334045410156, 121.10260772705078, 24.738525390625, 68.17706298828125, 34.609439849853516, 190.056396484375, 135.93077087402344, 46.45123291015625, 174.08828735351562, 7.363655090332031, 64.46864318847656, 37.60575485229492, -1.4908790588378906, 72.679443359375, -78.82537841796875, 38.401702880859375, 18.550561904907227, 205.55029296875, 34.6555290222168, 17.533775329589844, 137.37112426757812, 38.669044494628906, 59.70518112182617, 90.09002685546875, 129.30422973632812, 234.02377319335938, 64.758544921875, 159.29913330078125, -10.811914443969727, 122.27409362792969, 49.80267333984375, 33.568336486816406, 108.62353515625, 308.45208740234375, 197.89605712890625, 59.17525100708008, -1.9448966979980469, 124.13370513916016, 28.814462661743164, 68.87743377685547, 49.69915771484375, -11.357025146484375, 315.87847900390625, -4.360382080078125, 77.79480743408203, -14.396331787109375, -13.235565185546875, 348.98272705078125, 135.26345825195312, 55.0572509765625, 128.042724609375, 29.760910034179688, 119.63719940185547, 72.6358642578125, -24.443077087402344, 6.470298767089844, 90.19235229492188, 105.13786315917969, 91.41952514648438, 15.045608520507812, 76.46940612792969], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000186.npy"}
|
|
{"epoch": 0.27312775330396477, "step": 187, "batch_size": 64, "mean": 65.3707275390625, "std": 83.92324829101562, "min": -114.72757720947266, "p10": -23.978037643432614, "median": 52.36276435852051, "p90": 167.3078552246094, "max": 312.03912353515625, "pos_frac": 0.796875, "sample": [-114.72757720947266, 185.6524658203125, -12.89824104309082, 52.55934524536133, -66.76028442382812, 146.64132690429688, 72.33867645263672, 101.51876831054688, 45.276649475097656, 48.70070266723633, 63.45293045043945, 69.46031951904297, 258.9652099609375, 56.984825134277344, 75.58528137207031, 155.004638671875, 136.96832275390625, 32.97859191894531, 69.8780517578125, -39.81863784790039, 50.966583251953125, 23.973052978515625, 312.03912353515625, -9.094772338867188, 125.44099426269531, 48.471893310546875, 193.65457153320312, -32.021854400634766, 155.25579833984375, 30.051485061645508, -25.6417236328125, 88.09581756591797, 17.11919403076172, -46.04420471191406, -20.09610366821289, 154.43203735351562, -5.965705871582031, 10.20698356628418, -11.046989440917969, 160.92962646484375, 131.08982849121094, 287.709716796875, 70.00411224365234, 24.502281188964844, 78.21319580078125, 32.530372619628906, 22.02750015258789, 33.17787170410156, -99.46448516845703, 46.27086639404297, 66.30561065673828, 0.20172882080078125, 103.50736999511719, 85.37005615234375, 71.59700012207031, 128.36134338378906, 75.87724304199219, 7.068794250488281, 52.16618347167969, 200.0432891845703, 170.0413818359375, 32.320098876953125, -8.757238388061523, 15.075326919555664], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000187.npy"}
|
|
{"epoch": 0.2745961820851689, "step": 188, "batch_size": 64, "mean": 81.50593566894531, "std": 94.33646392822266, "min": -116.11408233642578, "p10": -8.361625480651856, "median": 55.921634674072266, "p90": 220.9640121459961, "max": 370.013916015625, "pos_frac": 0.84375, "sample": [238.47073364257812, 93.7437515258789, 194.5027313232422, 39.97688674926758, 70.86837005615234, 12.425537109375, -24.582260131835938, -65.91374969482422, 225.66432189941406, 83.36186981201172, 49.8394775390625, -15.856590270996094, 91.64657592773438, 73.26549530029297, 221.02052307128906, 193.4857940673828, 70.7953109741211, -8.285659790039062, 50.82490539550781, 6.529546737670898, 16.276657104492188, 18.196861267089844, 60.748260498046875, 118.03845977783203, 41.74097442626953, 37.408546447753906, 25.75524139404297, 3.392589569091797, 268.92755126953125, 26.748504638671875, 313.36993408203125, -116.11408233642578, 220.8321533203125, 100.56019592285156, 137.858154296875, 97.34829711914062, 28.433868408203125, 173.8885498046875, -7.758340835571289, 166.95718383789062, 113.91877746582031, 39.780517578125, -67.82989501953125, -7.868535995483398, 162.61390686035156, 37.33428955078125, 191.7652130126953, 23.774341583251953, 28.912277221679688, 260.65618896484375, 55.330543518066406, 139.42864990234375, 50.35248565673828, -10.104339599609375, 131.5734405517578, 59.27989196777344, 91.97270202636719, 370.013916015625, 84.86865234375, 52.19695281982422, 56.512725830078125, -8.394182205200195, 10.791511535644531, 15.106382369995117], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000188.npy"}
|
|
{"epoch": 0.27606461086637296, "step": 189, "batch_size": 64, "mean": 68.28736114501953, "std": 103.54136657714844, "min": -145.45263671875, "p10": -40.26057510375976, "median": 46.88081169128418, "p90": 169.6129898071289, "max": 441.35888671875, "pos_frac": 0.796875, "sample": [31.160568237304688, 152.0130615234375, -80.29890441894531, 261.25201416015625, -31.36125946044922, -44.231475830078125, 39.42926788330078, 3.5182723999023438, -12.546586990356445, 53.689300537109375, 131.80384826660156, 130.98928833007812, 115.76597595214844, 26.822677612304688, 139.78497314453125, 47.69380187988281, 127.80655670166016, 33.110164642333984, -95.43341064453125, 8.02285385131836, -60.96250915527344, 41.741119384765625, 72.98985290527344, 172.72967529296875, 131.74949645996094, 53.49217224121094, -37.66322326660156, 46.06782150268555, 28.036602020263672, 160.87210083007812, 8.479312896728516, 24.70423698425293, 91.76190185546875, 371.0859375, 29.76727294921875, -4.5885009765625, 15.204998016357422, 200.97474670410156, 72.72720336914062, 22.11798858642578, -125.36250305175781, 100.4438705444336, -18.781143188476562, 75.40886688232422, 45.85932159423828, 288.7723388671875, 168.8033905029297, 441.35888671875, -8.990402221679688, 103.08247375488281, 156.2411651611328, 26.787878036499023, 63.9135627746582, 123.85588073730469, 79.73009490966797, 141.86788940429688, 169.9599609375, 75.75791931152344, 9.718574523925781, -145.45263671875, 21.881362915039062, -41.37372589111328, 12.445816040039062, 124.18316650390625], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000189.npy"}
|
|
{"epoch": 0.2775330396475771, "step": 190, "batch_size": 64, "mean": 78.49307250976562, "std": 108.61783599853516, "min": -155.85366821289062, "p10": -38.077988433837874, "median": 64.44353103637695, "p90": 228.26604156494142, "max": 342.05902099609375, "pos_frac": 0.796875, "sample": [-3.0611419677734375, 101.58148193359375, 133.1403350830078, 91.07914733886719, 19.276992797851562, -62.00715637207031, 187.52731323242188, 36.757606506347656, 9.837690353393555, 250.2760009765625, 67.87722778320312, 52.75813293457031, 102.10810852050781, 31.294570922851562, 316.457275390625, 102.84939575195312, 79.07691955566406, 86.67002868652344, 165.51327514648438, 39.142333984375, 2.076730728149414, 225.14169311523438, -155.85366821289062, 61.00983428955078, 227.18917846679688, -8.851211547851562, 43.51409912109375, 250.7084503173828, 163.0684814453125, 68.59581756591797, -13.447952270507812, 228.72755432128906, 13.432289123535156, -5.312952041625977, 100.6246109008789, 82.99404907226562, -60.52893829345703, -24.151031494140625, -132.40975952148438, 24.458602905273438, 108.21416473388672, -44.04668426513672, 100.03199005126953, 31.563766479492188, -23.32847023010254, -63.28497314453125, 97.26435852050781, 215.48541259765625, 10.352935791015625, 52.015380859375, -146.2382354736328, 299.34564208984375, 87.32257843017578, 30.66324806213379, 174.85995483398438, 108.31412506103516, 53.24348449707031, 342.05902099609375, 2.946531295776367, 310.3677062988281, 116.80104064941406, 191.8838653564453, 29.836727142333984, 38.741180419921875], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000190.npy"}
|
|
{"epoch": 0.2790014684287812, "step": 191, "batch_size": 64, "mean": 114.8416748046875, "std": 120.66770935058594, "min": -93.97439575195312, "p10": 3.6465276718139656, "median": 95.29716110229492, "p90": 283.367855834961, "max": 459.3980712890625, "pos_frac": 0.921875, "sample": [338.8761901855469, 80.45596313476562, 456.6465759277344, 23.1988525390625, 222.84585571289062, 76.40498352050781, 130.08644104003906, 233.8504638671875, 7.899702072143555, 299.84075927734375, 89.06852722167969, 119.93550872802734, 19.965805053710938, 211.884033203125, 236.92034912109375, 24.16736602783203, 33.04737091064453, 68.92298126220703, 27.324996948242188, 70.15682220458984, 200.93174743652344, 53.248291015625, 36.65618896484375, 228.77236938476562, 103.3703384399414, 19.640701293945312, 4.257181167602539, 459.3980712890625, 106.45881652832031, 3.3848190307617188, 176.08633422851562, -62.524784088134766, 16.16619873046875, -29.53396224975586, 99.52993774414062, 144.82269287109375, 122.03512573242188, 126.47526550292969, 290.94976806640625, 26.798595428466797, 2.3760757446289062, 187.54129028320312, 193.45870971679688, -93.97439575195312, 38.68354797363281, 448.8027648925781, 49.27776336669922, -42.53456115722656, 96.88165283203125, 110.57304382324219, 142.2650909423828, 265.6767272949219, 36.39032745361328, 8.793872833251953, 109.631103515625, 123.6414794921875, -20.45362091064453, 123.76434326171875, 5.6494140625, 93.7126693725586, 316.83551025390625, 54.18170928955078, 73.19612884521484, 127.07291412353516], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000191.npy"}
|
|
{"epoch": 0.28046989720998533, "step": 192, "batch_size": 64, "mean": 76.80948638916016, "std": 110.86383056640625, "min": -171.35293579101562, "p10": -35.96512832641601, "median": 60.141822814941406, "p90": 248.44160919189454, "max": 399.74298095703125, "pos_frac": 0.765625, "sample": [-19.73511505126953, -91.80787658691406, 49.52062225341797, 134.5430450439453, 129.60081481933594, -3.1508712768554688, -38.30956268310547, -6.307535171508789, 28.275421142578125, -171.35293579101562, 17.138717651367188, 74.47471618652344, -30.047210693359375, 166.5481719970703, 21.234725952148438, 30.457412719726562, -30.494781494140625, 81.71565246582031, -20.490325927734375, 133.87564086914062, -81.3175277709961, 318.7145080566406, -51.76781463623047, 69.56493377685547, -3.7326507568359375, 267.59808349609375, 285.63824462890625, 322.6892395019531, 91.96087646484375, 264.05596923828125, 72.38526916503906, 71.77944946289062, 399.74298095703125, 90.46923828125, 143.13095092773438, 14.676328659057617, 107.28431701660156, 182.1396942138672, 199.73068237304688, 37.71884536743164, 49.75898742675781, 246.38624572753906, 10.803163528442383, 33.905792236328125, 115.06478881835938, 64.0597152709961, 56.22393035888672, -114.48677062988281, 144.2292938232422, 45.00305938720703, 76.47676086425781, 5.38812255859375, 13.223915100097656, -49.438209533691406, 11.592632293701172, 192.53378295898438, 71.29595947265625, -6.01507568359375, 11.041061401367188, 164.18136596679688, 119.18590545654297, 106.63964080810547, 41.28047180175781, 249.32247924804688], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000192.npy"}
|
|
{"epoch": 0.28193832599118945, "step": 193, "batch_size": 64, "mean": 45.274635314941406, "std": 103.48821258544922, "min": -309.3634033203125, "p10": -52.50378570556641, "median": 47.64688491821289, "p90": 156.13507537841807, "max": 302.02630615234375, "pos_frac": 0.6875, "sample": [96.34574890136719, -21.051956176757812, 32.68608856201172, 129.39859008789062, -286.43255615234375, 98.93049621582031, 63.49750518798828, 52.15379333496094, -309.3634033203125, 233.13668823242188, 65.0822982788086, -20.19391441345215, 131.37857055664062, -13.533580780029297, 8.901046752929688, 76.04925537109375, 81.56098937988281, 90.04347229003906, -3.0685043334960938, 131.7296142578125, 3.0650711059570312, -3.4715499877929688, 25.22134017944336, 96.88355255126953, 22.8359317779541, 102.31642150878906, -38.09183883666992, 13.135185241699219, -0.274749755859375, -32.543731689453125, -24.289918899536133, 166.5945587158203, -48.28099060058594, -53.375404357910156, 173.93142700195312, -91.61469268798828, 129.37713623046875, -50.470008850097656, 10.799020767211914, 56.068443298339844, 7.9007720947265625, 101.36681365966797, 186.3270263671875, 115.74898529052734, 90.67955017089844, -113.90432739257812, 257.5762939453125, 85.6944808959961, 94.63792419433594, 105.2083511352539, -53.849876403808594, 76.6765365600586, 15.577960968017578, -38.249176025390625, -2.048248291015625, 121.37462615966797, 302.02630615234375, 110.57502746582031, -54.134525299072266, 22.29035186767578, 214.7568359375, 43.139976501464844, 24.240249633789062, 88.89922332763672], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000193.npy"}
|
|
{"epoch": 0.2834067547723935, "step": 194, "batch_size": 64, "mean": 73.66490173339844, "std": 77.32048797607422, "min": -54.53433609008789, "p10": -27.155361938476556, "median": 59.30291938781738, "p90": 198.7749221801758, "max": 235.94017028808594, "pos_frac": 0.828125, "sample": [115.96422576904297, 3.2216949462890625, 26.96095848083496, 24.83759307861328, 85.77632904052734, 173.96844482421875, 71.4768295288086, 139.6827850341797, -7.6341705322265625, 6.551883697509766, 26.296770095825195, 210.45846557617188, -38.93284606933594, -19.376983642578125, 41.55767059326172, 35.09584045410156, 162.80050659179688, 37.98579406738281, 59.00626754760742, 117.44086456298828, 29.745792388916016, 72.4607925415039, 10.18985366821289, 4.467525482177734, 96.63469696044922, -54.53433609008789, -1.5400333404541016, 211.78729248046875, 59.541969299316406, 81.85107421875, 214.7514190673828, 117.02534484863281, 199.83079528808594, 137.75741577148438, 22.443565368652344, 235.94017028808594, 59.06386947631836, 24.457738876342773, 115.73284912109375, 45.49716567993164, 159.2182159423828, -39.330780029296875, 29.65410804748535, -4.961311340332031, 95.87800598144531, 146.97308349609375, 6.595760345458984, -46.51664733886719, 33.88646697998047, -30.48895263671875, 96.64077758789062, -39.442649841308594, 53.437644958496094, 196.31121826171875, 56.42049789428711, 232.7147216796875, 111.24848937988281, 74.32318115234375, 216.39842224121094, 86.34976959228516, 145.40805053710938, 127.02579498291016, 81.62643432617188, -31.06079864501953], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000194.npy"}
|
|
{"epoch": 0.28487518355359764, "step": 195, "batch_size": 64, "mean": 78.51309204101562, "std": 89.32923126220703, "min": -84.08810424804688, "p10": -34.91079521179198, "median": 68.58395385742188, "p90": 189.19113922119143, "max": 331.5515441894531, "pos_frac": 0.828125, "sample": [73.25545501708984, 15.887542724609375, 194.80982971191406, 45.7839469909668, 68.60964965820312, 322.76800537109375, 43.710487365722656, 254.9925079345703, 147.8712158203125, -7.837545394897461, 1.3080596923828125, 46.937255859375, 11.402214050292969, -84.08810424804688, 174.73223876953125, 89.70841979980469, 68.55825805664062, 192.2648162841797, -41.92176055908203, 276.96380615234375, -21.844982147216797, 98.71832275390625, -77.2080078125, 22.81939697265625, 104.28433227539062, 89.96430206298828, 162.501220703125, 208.3749237060547, 116.29933166503906, 133.0736846923828, 106.93073272705078, -55.77794647216797, 67.04973602294922, 151.04176330566406, 27.140853881835938, 116.19071960449219, -10.35120964050293, 9.185768127441406, 132.417236328125, 165.8329315185547, -40.676048278808594, 182.01922607421875, 43.494205474853516, 44.92902374267578, 331.5515441894531, 117.6971206665039, 70.05752563476562, 62.9017333984375, -45.90135955810547, 24.360517501831055, 111.07444763183594, -14.369857788085938, 155.06634521484375, 89.97283935546875, 19.909164428710938, 47.1690673828125, 25.46874237060547, 64.43879699707031, 20.98053550720215, 80.98888397216797, -40.51042938232422, 38.115848541259766, 114.60044860839844, 79.14015197753906], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000195.npy"}
|
|
{"epoch": 0.28634361233480177, "step": 196, "batch_size": 64, "mean": 88.7871322631836, "std": 107.77754211425781, "min": -139.64212036132812, "p10": -28.209057235717765, "median": 61.28285217285156, "p90": 237.24114532470705, "max": 341.61065673828125, "pos_frac": 0.78125, "sample": [179.72531127929688, 179.1074981689453, 207.9249267578125, 171.92465209960938, -39.40142059326172, 229.7703094482422, 228.74610900878906, 320.803466796875, 187.7728271484375, 56.29829406738281, 15.451519012451172, 45.984771728515625, 217.3994140625, 185.6685791015625, 46.35899353027344, 16.158876419067383, -20.026325225830078, 61.50715637207031, -1.9646530151367188, 55.062896728515625, 341.61065673828125, -15.96982192993164, 54.92195129394531, 27.494266510009766, 106.08659362792969, 90.3576431274414, 105.29142761230469, 125.02982330322266, 34.20623779296875, -3.0388355255126953, 123.50148010253906, 65.50727844238281, 325.462890625, 333.774658203125, -17.748016357421875, 126.19569396972656, 72.48884582519531, 55.96104431152344, 77.85871887207031, 58.88153839111328, 55.19794464111328, 63.89019012451172, 211.15029907226562, -1.2794609069824219, 111.4942398071289, 34.3597297668457, 55.73668670654297, -96.75148010253906, 55.307037353515625, -139.64212036132812, -55.17431640625, 125.30986785888672, 72.23745727539062, -18.081195831298828, 241.0148162841797, -43.42949676513672, -83.03485107421875, 61.05854797363281, 19.143173217773438, 119.71866607666016, 240.44293212890625, 247.33119201660156, -31.7159423828125, 5.945518493652344], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000196.npy"}
|
|
{"epoch": 0.2878120411160059, "step": 197, "batch_size": 64, "mean": 69.5676040649414, "std": 84.98685455322266, "min": -90.77845764160156, "p10": -18.69024124145506, "median": 54.486385345458984, "p90": 168.74222259521486, "max": 301.1405029296875, "pos_frac": 0.875, "sample": [45.32843017578125, 282.5889587402344, 107.86946105957031, 8.236541748046875, 125.79554748535156, 6.131828308105469, 29.138900756835938, -41.76656723022461, 53.83039093017578, 293.16412353515625, 301.1405029296875, 167.5775604248047, 57.99431610107422, 86.64990234375, -65.03571319580078, 0.3900337219238281, 81.16780853271484, 232.44068908691406, 39.918212890625, 59.55051803588867, 96.30046844482422, 160.71070861816406, 46.68171310424805, 7.081573486328125, 56.36370086669922, 8.782739639282227, -68.7109375, 90.95783996582031, 94.65636444091797, 90.11573028564453, 214.96063232421875, 108.80430603027344, 30.163339614868164, 166.30429077148438, 0.719390869140625, 142.2843017578125, 2.82550048828125, 3.3735198974609375, -26.303924560546875, -90.77845764160156, 27.024782180786133, 106.1321029663086, -0.9249801635742188, 22.182334899902344, 13.604528427124023, 55.14237976074219, 212.66453552246094, 143.7283935546875, 42.329681396484375, 162.79791259765625, 0.28581809997558594, 49.76091384887695, 72.25101470947266, 24.623146057128906, 6.354087829589844, 92.52568817138672, 127.37461853027344, 78.14979553222656, 169.24136352539062, -30.155685424804688, -37.19256591796875, 28.948768615722656, 16.282257080078125, 61.79142761230469], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000197.npy"}
|
|
{"epoch": 0.28928046989721, "step": 198, "batch_size": 64, "mean": 85.09733581542969, "std": 109.61522674560547, "min": -127.19552612304688, "p10": -7.816974830627441, "median": 71.33218765258789, "p90": 178.26461791992188, "max": 477.0710144042969, "pos_frac": 0.859375, "sample": [-103.00586700439453, 48.93257141113281, -8.226537704467773, 414.2876281738281, 100.64686584472656, 168.5145263671875, 235.75039672851562, 42.70277786254883, 97.61463928222656, 58.616371154785156, 119.73210144042969, 112.51394653320312, -92.99539184570312, 35.20165252685547, 16.2802734375, 167.75439453125, 115.25792694091797, 91.341064453125, 76.13592529296875, 76.28141021728516, -9.339170455932617, -6.861328125, 33.03386688232422, 83.86366271972656, 10.332120895385742, 51.81275177001953, -46.781532287597656, 179.64678955078125, 87.79396057128906, 96.46839904785156, 84.84709167480469, 72.84654998779297, 69.81782531738281, 44.64373779296875, 92.66439819335938, 5.456295013427734, 54.24856948852539, 135.004150390625, 106.04043579101562, 175.03955078125, -127.19552612304688, 417.05706787109375, 56.08538055419922, 56.83142852783203, -1.5909271240234375, 13.232406616210938, 213.99017333984375, -24.503297805786133, 5.447154998779297, 59.661949157714844, 19.421749114990234, 65.186279296875, 72.89654541015625, 114.16207885742188, 8.023199081420898, 138.94540405273438, 119.28285217285156, 159.33154296875, 7.642498016357422, 340.1973876953125, 3.555023193359375, 477.0710144042969, 44.69196319580078, 112.89125061035156], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000198.npy"}
|
|
{"epoch": 0.2907488986784141, "step": 199, "batch_size": 64, "mean": 80.05874633789062, "std": 99.43260192871094, "min": -126.21112060546875, "p10": -33.65652599334716, "median": 66.8620491027832, "p90": 209.9941452026368, "max": 295.7828369140625, "pos_frac": 0.796875, "sample": [164.0804443359375, 137.97222900390625, -80.51174926757812, -3.362060546875, 191.1065673828125, 63.214271545410156, -37.304771423339844, 166.76280212402344, -113.46088409423828, 102.27880096435547, 15.472053527832031, -42.24211120605469, -102.47030639648438, 155.80426025390625, 180.88507080078125, 71.70059967041016, 10.624855041503906, 109.93526458740234, 276.52490234375, 38.173797607421875, 56.572998046875, 145.88034057617188, 270.14349365234375, 218.0888214111328, -68.12336730957031, 27.990570068359375, -2.6591262817382812, 80.41413116455078, 173.69313049316406, 34.74169921875, 27.76055908203125, 190.981689453125, 295.7828369140625, 64.3028564453125, 42.765037536621094, 181.31607055664062, -23.104171752929688, 91.8126220703125, 20.565597534179688, 91.11964416503906, 33.29876708984375, 164.51657104492188, 117.8732681274414, 22.559497833251953, 228.21578979492188, 73.37256622314453, 44.91498565673828, -25.143953323364258, -20.666019439697266, -126.21112060546875, 188.04782104492188, 58.766563415527344, 69.4212417602539, 87.47572326660156, 254.7698516845703, 109.05458068847656, 7.499086380004883, 79.1766586303711, 25.434585571289062, 42.360984802246094, 41.22724914550781, 139.8721923828125, 292.989990234375, -10.2965087890625], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000199.npy"}
|
|
{"epoch": 0.2922173274596182, "step": 200, "batch_size": 64, "mean": 84.66621398925781, "std": 111.02828216552734, "min": -176.85302734375, "p10": -25.241154479980462, "median": 59.669790267944336, "p90": 236.16460266113293, "max": 433.1292724609375, "pos_frac": 0.8125, "sample": [200.69369506835938, 117.40788269042969, 58.982540130615234, 247.48904418945312, 46.59794998168945, 120.68151092529297, 11.533561706542969, 0.32469940185546875, 315.85345458984375, -65.26579284667969, 261.9570007324219, 188.181396484375, 92.5390853881836, -9.480167388916016, 56.983646392822266, 209.74090576171875, 191.75555419921875, 267.22442626953125, 23.289581298828125, 204.02630615234375, 57.630149841308594, 13.23237419128418, 139.88230895996094, 155.41224670410156, -15.337093353271484, 70.4889144897461, 129.16390991210938, -86.37649536132812, 433.1292724609375, 49.43163299560547, 94.8365478515625, -19.625656127929688, 1.607452392578125, 47.049163818359375, 44.371337890625, -3.926237106323242, -176.85302734375, 23.520050048828125, 96.54290771484375, 299.1105041503906, 10.927099227905273, 84.22708129882812, 42.109405517578125, 27.62860870361328, 3.732616424560547, -89.11279296875, 72.7929916381836, -32.312015533447266, 208.29470825195312, -18.836599349975586, 22.2188720703125, 288.8258056640625, 46.617828369140625, 35.128639221191406, 60.35704040527344, 196.8795928955078, 151.3685760498047, 112.94568634033203, 62.258514404296875, 115.18128967285156, -86.65411376953125, 124.36367797851562, -27.647796630859375, 113.5365982055664], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000200.npy"}
|
|
{"epoch": 0.2936857562408223, "step": 201, "batch_size": 64, "mean": 85.23928833007812, "std": 104.50066375732422, "min": -151.52935791015625, "p10": -25.06190948486328, "median": 69.45576477050781, "p90": 205.19166107177736, "max": 359.8392333984375, "pos_frac": 0.859375, "sample": [125.73634338378906, 33.723358154296875, 161.2064971923828, -148.8603515625, 40.884368896484375, -22.598526000976562, 14.629049301147461, 13.213125228881836, -11.415863037109375, 52.833160400390625, 0.649627685546875, 31.390533447265625, 25.239013671875, 12.264846801757812, 326.2349548339844, 94.54911041259766, 43.228546142578125, 90.45863342285156, 25.89740753173828, 207.30841064453125, 296.8096008300781, 138.57464599609375, 24.579254150390625, 67.90338134765625, 146.09915161132812, 259.1412353515625, -58.91194534301758, 149.9211883544922, 146.88287353515625, 12.160745620727539, 63.6691780090332, 110.32389831542969, 123.0600814819336, 115.45401000976562, -26.117645263671875, -39.08926010131836, 173.9974365234375, 49.66942596435547, 46.13978576660156, 359.8392333984375, 84.3785400390625, 8.497337341308594, 114.82950592041016, 57.85174560546875, 43.995506286621094, 148.33287048339844, 93.9188003540039, 194.1210479736328, 290.0356140136719, 71.00814819335938, 87.2343521118164, -44.682472229003906, 116.50764465332031, 77.09998321533203, -151.52935791015625, 200.25257873535156, 352.4828186035156, 117.3590316772461, 28.669260025024414, -57.40849304199219, 161.42453002929688, 30.96392059326172, 21.324859619140625, 131.96832275390625], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000201.npy"}
|
|
{"epoch": 0.29515418502202645, "step": 202, "batch_size": 64, "mean": 57.55382537841797, "std": 95.78604888916016, "min": -142.0554656982422, "p10": -60.20078048706055, "median": 58.30447769165039, "p90": 203.90608825683594, "max": 290.03912353515625, "pos_frac": 0.71875, "sample": [290.03912353515625, -36.99003219604492, 94.73969268798828, 206.52040100097656, 2.854654312133789, 0.8685989379882812, 130.38201904296875, -31.548574447631836, 103.14900207519531, 83.99223327636719, 40.06427001953125, -111.93159484863281, 118.28730010986328, 199.64349365234375, 116.5833969116211, 74.64513397216797, -132.53582763671875, 29.411224365234375, 85.17536926269531, 242.4770050048828, 72.65581512451172, 206.3782501220703, -75.27163696289062, 154.48727416992188, 14.864402770996094, 5.416999816894531, 271.516357421875, -6.314653396606445, 85.26732635498047, 113.19281005859375, 78.82807922363281, 126.43310546875, 206.29864501953125, -25.19428253173828, -15.96600341796875, -61.00532531738281, 58.94476318359375, 52.00958251953125, 41.62628173828125, 156.10577392578125, 92.10177612304688, -31.848861694335938, 17.440872192382812, 203.84603881835938, 106.44926452636719, 104.89253234863281, 75.8808822631836, -142.0554656982422, 65.79730224609375, 48.753753662109375, 12.698657989501953, 31.667287826538086, -22.264389038085938, -50.00963592529297, 24.326644897460938, -67.23757934570312, 65.62283325195312, -58.323509216308594, -36.8438606262207, 57.66419219970703, 203.93182373046875, -66.45133972167969, 87.00785064697266, -5.702457427978516], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000202.npy"}
|
|
{"epoch": 0.2966226138032305, "step": 203, "batch_size": 64, "mean": 70.53004455566406, "std": 80.90208435058594, "min": -89.76900482177734, "p10": -35.248274612426755, "median": 59.30097961425781, "p90": 175.95536956787112, "max": 300.8555908203125, "pos_frac": 0.828125, "sample": [41.19801712036133, 106.92132568359375, 155.765625, 22.293861389160156, -14.113147735595703, 220.48849487304688, -43.33315658569336, 81.32867431640625, 23.902938842773438, 83.14984130859375, 8.669746398925781, 91.42859649658203, 97.43396759033203, 90.71073150634766, -69.54219055175781, 102.79075622558594, 60.06913757324219, -4.846923828125, 146.4962158203125, 101.19525146484375, 235.76461791992188, 247.92791748046875, -33.57731246948242, 58.35390853881836, 86.82784271240234, 36.86924743652344, 300.8555908203125, 18.101821899414062, -50.705055236816406, 93.95841979980469, 16.907005310058594, 130.47811889648438, 119.22731018066406, 203.65298461914062, 49.634796142578125, 148.36141967773438, 81.29344940185547, 94.98286437988281, 38.967262268066406, 66.30394744873047, 15.725967407226562, 79.53148651123047, 56.912498474121094, 134.41415405273438, -8.536849975585938, -89.76900482177734, -35.96440124511719, 54.939395904541016, -45.60588073730469, 18.64032745361328, 149.17446899414062, 29.987525939941406, 37.41569900512695, 40.61705780029297, 23.183868408203125, -44.49029541015625, 113.92107391357422, 58.53282165527344, 172.1358642578125, 227.7845458984375, 10.060811996459961, 177.59230041503906, 64.75458526611328, 26.770751953125], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000203.npy"}
|
|
{"epoch": 0.29809104258443464, "step": 204, "batch_size": 64, "mean": 63.52558898925781, "std": 96.5376968383789, "min": -110.81382751464844, "p10": -33.064022064208984, "median": 42.98810005187988, "p90": 152.3451766967774, "max": 351.02996826171875, "pos_frac": 0.734375, "sample": [139.60121154785156, 42.20833206176758, 25.83620834350586, 39.04376220703125, 100.58680725097656, 23.276016235351562, 127.41455078125, -67.22750091552734, 82.05143737792969, 32.143516540527344, -31.11648178100586, 28.670774459838867, 10.340530395507812, 72.43097686767578, 55.53875732421875, -23.25079345703125, -79.6380615234375, 115.5592269897461, 37.97593688964844, 17.013505935668945, 136.22459411621094, -58.76325225830078, -4.071868896484375, -19.520427703857422, 32.85491943359375, -10.246719360351562, 59.66432189941406, 109.39926147460938, 157.43576049804688, -37.45097351074219, 337.76312255859375, 13.465538024902344, -3.96087646484375, 26.353574752807617, -18.063705444335938, 138.4123992919922, 43.76786804199219, 75.8921890258789, 323.41937255859375, -110.81382751464844, 140.46714782714844, 139.30538940429688, 103.07826232910156, 73.10894775390625, 64.65072631835938, 1.892202377319336, 36.524559020996094, 132.20567321777344, 201.13714599609375, -21.6915283203125, 59.08460998535156, 126.83116912841797, -69.61956787109375, 61.72107696533203, 351.02996826171875, 239.3271484375, -22.493894577026367, 105.1299819946289, 25.50255012512207, 283.3118896484375, -31.580062866210938, 66.68589782714844, 93.50827026367188, -33.70000457763672], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000204.npy"}
|
|
{"epoch": 0.29955947136563876, "step": 205, "batch_size": 64, "mean": 84.29817962646484, "std": 102.43138885498047, "min": -156.152587890625, "p10": -43.12863845825195, "median": 82.2041015625, "p90": 213.64303588867188, "max": 301.94915771484375, "pos_frac": 0.78125, "sample": [19.465557098388672, 66.48876953125, 137.09292602539062, 301.94915771484375, 54.61949920654297, 29.21586036682129, -3.2690258026123047, 6.3272705078125, 37.638084411621094, 179.239501953125, 211.9856414794922, -100.1373291015625, -38.92784118652344, 88.65241241455078, -10.252174377441406, 59.361846923828125, 12.681129455566406, 267.0257263183594, 214.3533477783203, 29.707683563232422, 104.33197784423828, 62.75958251953125, -65.3056640625, 130.8760528564453, 222.76345825195312, 195.88327026367188, 159.50625610351562, -18.67178726196289, -44.19263458251953, 159.913330078125, -40.64598083496094, 301.3552551269531, 70.7752685546875, 57.716339111328125, 27.61199188232422, 87.63949584960938, 261.0440979003906, 172.62408447265625, 76.76870727539062, 87.89585876464844, -156.152587890625, 102.50692749023438, 119.53917694091797, 152.8690643310547, 104.62423706054688, -46.24575424194336, 114.80320739746094, 65.7091064453125, 163.65057373046875, 126.97127532958984, 198.1481170654297, 185.2400360107422, 147.73057556152344, 68.00859832763672, 129.4630584716797, 255.54095458984375, 92.73490905761719, -16.60797882080078, 5.04364013671875, -140.23092651367188, -49.34477233886719, 162.2610321044922, -23.944564819335938, 58.898712158203125], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000205.npy"}
|
|
{"epoch": 0.3010279001468429, "step": 206, "batch_size": 64, "mean": 91.27373504638672, "std": 96.12128448486328, "min": -132.91354370117188, "p10": -25.103776550292963, "median": 77.88680648803711, "p90": 210.2998748779297, "max": 346.88134765625, "pos_frac": 0.84375, "sample": [75.23301696777344, 138.11883544921875, 65.04052734375, 92.7703857421875, 346.88134765625, 238.47116088867188, 73.28541564941406, 30.744596481323242, 162.6089324951172, 192.71864318847656, 51.443634033203125, 66.66740417480469, 23.409639358520508, 64.80583953857422, -132.91354370117188, 83.28065490722656, 74.19476318359375, 63.03627014160156, 49.02015686035156, -17.042142868041992, 151.3932647705078, -27.111244201660156, 190.27627563476562, 284.34796142578125, 70.39662170410156, 134.33834838867188, 34.08665466308594, 174.74972534179688, -101.58247375488281, -80.70201873779297, 156.3683624267578, 211.2166290283203, 165.17807006835938, 155.5713653564453, 30.211259841918945, 188.9454345703125, 30.81291961669922, -54.80668258666992, 161.29336547851562, -87.28236389160156, 33.34430694580078, 155.3629150390625, 29.357501983642578, 116.91769409179688, 83.66529083251953, -11.057205200195312, 99.32312774658203, 72.73193359375, 140.03392028808594, 79.46360778808594, -40.400718688964844, -20.41968536376953, 76.31000518798828, 100.06255340576172, 270.38885498046875, 208.16078186035156, 2.5624523162841797, 110.653564453125, 113.21981811523438, 225.34304809570312, 47.28125, 21.939964294433594, 244.83831787109375, 152.9591064453125], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000206.npy"}
|
|
{"epoch": 0.302496328928047, "step": 207, "batch_size": 64, "mean": 106.98884582519531, "std": 112.61688232421875, "min": -73.1293716430664, "p10": -5.596554946899405, "median": 73.25764465332031, "p90": 241.03501281738284, "max": 427.68316650390625, "pos_frac": 0.890625, "sample": [11.682674407958984, -73.1293716430664, 46.47772979736328, 54.81962966918945, 127.4608383178711, 42.895545959472656, 226.26182556152344, 6.476806640625, 7.5862274169921875, 14.35565185546875, 85.79217529296875, 64.52857208251953, 222.83230590820312, -9.400863647460938, 91.84444427490234, 126.83736419677734, 54.967002868652344, 131.9628143310547, 27.889541625976562, 30.66164207458496, 42.258174896240234, 129.33770751953125, 27.761085510253906, 203.08367919921875, 19.799514770507812, -36.907867431640625, 242.739990234375, 49.66484069824219, 38.53557586669922, -9.261707305908203, 237.05673217773438, 204.3753204345703, -31.235015869140625, 2.9554672241210938, 38.63734436035156, 209.7169189453125, 32.03089904785156, 177.0620574951172, 84.47833251953125, 125.48472595214844, 199.36962890625, 115.59979248046875, 74.90940856933594, 138.0564727783203, 188.45361328125, 192.57257080078125, 71.60588073730469, 311.1378173828125, 396.2443542480469, 195.2134246826172, 45.386573791503906, 337.783447265625, -25.41652488708496, 49.66432189941406, 427.68316650390625, 42.45819091796875, 76.49339294433594, 77.94818115234375, 67.84390258789062, 352.5814208984375, -68.94869232177734, 175.56655883789062, 321.4204406738281, 3.2824172973632812], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000207.npy"}
|
|
{"epoch": 0.3039647577092511, "step": 208, "batch_size": 64, "mean": 85.1832046508789, "std": 106.9832534790039, "min": -206.12615966796875, "p10": -27.725476074218747, "median": 69.86326599121094, "p90": 219.75741119384767, "max": 336.3763427734375, "pos_frac": 0.875, "sample": [-132.46292114257812, 102.69578552246094, -82.28610229492188, 162.82432556152344, 23.302536010742188, 1.2778434753417969, 262.09844970703125, 5.633039474487305, 95.71776580810547, 197.809326171875, 188.275146484375, 57.71532440185547, 75.18231201171875, 148.32302856445312, 163.73806762695312, 70.04817962646484, 336.3763427734375, 40.74913024902344, 315.1852111816406, 130.69137573242188, 24.695114135742188, 31.49645233154297, 175.03091430664062, 127.13289642333984, 70.59115600585938, 35.77680206298828, 36.42547607421875, 25.8865966796875, -76.91612243652344, 190.96405029296875, -101.13832092285156, 27.74860382080078, 37.65449905395508, 72.94049835205078, 105.00585174560547, 213.0037078857422, 190.65283203125, 44.02095413208008, 6.223426818847656, 159.26760864257812, 21.22760009765625, 42.4547119140625, 15.639556884765625, 222.65185546875, 69.67835235595703, 271.53515625, 80.49739837646484, -206.12615966796875, -29.711158752441406, 95.62704467773438, 8.27093505859375, 68.37989807128906, 253.5494384765625, 83.20494079589844, 53.88386535644531, -29.606170654296875, 23.28063201904297, 308.7945251464844, 157.72659301757812, 196.5007781982422, 138.5369873046875, 54.54317092895508, 15.164871215820312, -23.337188720703125], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000208.npy"}
|
|
{"epoch": 0.3054331864904552, "step": 209, "batch_size": 64, "mean": 75.00133514404297, "std": 101.9400634765625, "min": -195.39474487304688, "p10": -37.79809303283691, "median": 64.46062850952148, "p90": 226.11485137939454, "max": 295.1064758300781, "pos_frac": 0.734375, "sample": [249.8610382080078, 67.10890197753906, 135.48789978027344, -8.504753112792969, 287.32611083984375, 293.17877197265625, 242.3248291015625, 65.9090805053711, 99.13705444335938, 63.012176513671875, -32.54267501831055, 72.1661605834961, 136.9742431640625, 51.79863739013672, 145.4031219482422, 102.3177719116211, 152.03392028808594, -86.91841125488281, -13.4393310546875, 87.65972137451172, 169.87393188476562, 176.21548461914062, 4.715911865234375, -66.38432312011719, 50.422584533691406, -29.81902313232422, -26.456308364868164, 38.562904357910156, 51.35331726074219, 131.26939392089844, 87.41006469726562, 76.18488311767578, -88.13823699951172, -4.181339263916016, 49.820892333984375, 47.423065185546875, -59.98354721069336, -23.087459564208984, 295.1064758300781, 176.8258819580078, 35.89031219482422, 257.64617919921875, 15.409858703613281, -3.1386489868164062, -22.171401977539062, 32.136497497558594, 77.73363494873047, 54.737552642822266, 150.3218536376953, 103.6788330078125, 105.09759521484375, -11.456995010375977, -40.0504150390625, -195.39474487304688, 141.80181884765625, 56.11444091796875, 54.75215530395508, 50.25957489013672, 91.78863525390625, -86.65147399902344, 227.84046936035156, 222.08840942382812, 164.73318481445312, 149.4891357421875], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000209.npy"}
|
|
{"epoch": 0.3069016152716593, "step": 210, "batch_size": 64, "mean": 82.85260009765625, "std": 95.31683349609375, "min": -118.5609130859375, "p10": -11.898440551757812, "median": 59.420339584350586, "p90": 240.543685913086, "max": 310.3260498046875, "pos_frac": 0.828125, "sample": [-12.158706665039062, 33.62773895263672, 112.1130142211914, 66.22630310058594, 11.752944946289062, 16.30288314819336, 152.51876831054688, -7.681253433227539, 98.17752075195312, -27.84136199951172, 142.4293212890625, 0.16393280029296875, 34.891170501708984, 40.3758544921875, 223.02723693847656, 109.26045989990234, -11.291152954101562, 65.80670166015625, 101.07575225830078, 2.504352569580078, 203.3280487060547, 93.82337951660156, 38.87274169921875, 154.34620666503906, 141.5039520263672, 4.66685676574707, -1.746795654296875, 205.82095336914062, 149.96755981445312, 255.5756378173828, 77.99857330322266, 166.93138122558594, 106.97530364990234, 257.25531005859375, -49.55665588378906, -13.446395874023438, -41.48704147338867, 19.511947631835938, 43.005210876464844, 15.55710220336914, 128.52362060546875, 121.01144409179688, 248.0507354736328, 56.164920806884766, 11.087263107299805, -6.293792724609375, 259.6895446777344, 2.1414794921875, 94.02998352050781, 103.6394271850586, 212.27513122558594, 310.3260498046875, 194.88372802734375, 62.675758361816406, -13.590179443359375, 53.91413879394531, 44.24951934814453, 8.513980865478516, 14.986970901489258, 260.36956787109375, 0.7323532104492188, 6.582923889160156, -118.5609130859375, 266.97802734375], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000210.npy"}
|
|
{"epoch": 0.30837004405286345, "step": 211, "batch_size": 64, "mean": 71.38788604736328, "std": 103.82878875732422, "min": -121.89642333984375, "p10": -29.524095916748045, "median": 59.336395263671875, "p90": 186.97221527099614, "max": 383.9693298339844, "pos_frac": 0.765625, "sample": [74.638916015625, -19.928068161010742, 176.94036865234375, 72.42411041259766, 65.26153564453125, 105.42526245117188, 3.5319976806640625, 124.88636016845703, 359.0460205078125, -89.31884765625, 7.658668518066406, 42.08082580566406, 22.359134674072266, -29.257232666015625, 82.94561767578125, 167.59585571289062, 120.58552551269531, 102.94024658203125, 89.82473754882812, -9.207550048828125, -3.01312255859375, 5.0560302734375, 134.35137939453125, 80.9202651977539, -17.92206573486328, 4.2815093994140625, -26.94622802734375, 307.7560119628906, 33.24861526489258, 383.9693298339844, 308.26324462890625, -14.421821594238281, -67.30499267578125, 91.12278747558594, 175.15919494628906, 177.4586944580078, -66.73043823242188, 124.03553771972656, 33.505889892578125, 24.68878173828125, 88.72300720214844, 238.1428985595703, -29.638465881347656, 45.2657470703125, 18.440153121948242, 53.4112548828125, 48.090126037597656, -60.92442321777344, -13.426464080810547, 168.74368286132812, -92.22566223144531, 79.70509338378906, 144.11329650878906, 72.83111572265625, 113.1575927734375, 210.01016235351562, 67.03362274169922, 191.0494384765625, 3.5956268310546875, 107.67830657958984, 46.698604583740234, 23.853973388671875, 8.480392456054688, -121.89642333984375], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000211.npy"}
|
|
{"epoch": 0.30983847283406757, "step": 212, "batch_size": 64, "mean": 101.88738250732422, "std": 116.57473754882812, "min": -154.8925018310547, "p10": -25.420551681518553, "median": 105.73637390136719, "p90": 241.0304656982422, "max": 400.2375793457031, "pos_frac": 0.78125, "sample": [-154.8925018310547, -73.03455352783203, 266.7247009277344, 195.61965942382812, 63.04596710205078, 194.35336303710938, 59.48345947265625, -10.62588119506836, 15.247903823852539, 3.747983932495117, 164.7062225341797, 124.92255401611328, -98.09892272949219, -16.454193115234375, -0.8930854797363281, 17.547019958496094, 222.75067138671875, 203.47821044921875, 332.7691650390625, 182.09422302246094, 86.00546264648438, -90.00049591064453, 34.02659225463867, 73.76280212402344, 113.75885009765625, 124.90705108642578, 198.5798797607422, -38.39076614379883, 242.59133911132812, -4.933420181274414, 77.82305145263672, 131.15966796875, 400.2375793457031, 115.10600280761719, 29.90886116027832, -61.8038330078125, 227.0919647216797, -20.78912353515625, 115.77506256103516, 379.81402587890625, 200.12103271484375, 97.71389770507812, 68.06778717041016, 147.84176635742188, 18.68170166015625, 44.762542724609375, -25.669361114501953, 13.302715301513672, 206.38723754882812, 33.284793853759766, 114.50948333740234, 277.7279357910156, 267.37939453125, 167.94944763183594, 237.388427734375, -16.65665054321289, 144.68040466308594, 52.8011589050293, 143.2745361328125, 121.69050598144531, 37.1353759765625, 220.2125244140625, -24.839996337890625, 145.92303466796875], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000212.npy"}
|
|
{"epoch": 0.31130690161527164, "step": 213, "batch_size": 64, "mean": 98.48548889160156, "std": 108.07766723632812, "min": -155.37738037109375, "p10": -26.81486358642578, "median": 91.16873168945312, "p90": 249.08860778808597, "max": 351.94464111328125, "pos_frac": 0.8125, "sample": [242.51681518554688, -27.873809814453125, 34.730308532714844, -68.41837310791016, 134.57212829589844, 15.049392700195312, 50.60076141357422, 103.73597717285156, -155.37738037109375, -57.870819091796875, 6.448661804199219, 148.82904052734375, 221.595703125, -21.524629592895508, 76.45594024658203, 129.94436645507812, 9.902936935424805, 54.77290344238281, 140.71676635742188, 217.03294372558594, 351.94464111328125, 212.45816040039062, 43.935890197753906, 293.0555114746094, 132.63851928710938, 219.61839294433594, 255.16571044921875, -23.05630874633789, 8.813652038574219, 31.027244567871094, 140.12371826171875, 84.7970962524414, 127.75933074951172, 97.54036712646484, -8.17364501953125, 68.0672607421875, 123.05689239501953, 221.42288208007812, 235.62762451171875, 130.5363311767578, 8.85770034790039, 114.85269165039062, 309.6067199707031, -27.706573486328125, 135.14222717285156, 49.044281005859375, 143.934326171875, 314.4356384277344, -48.123779296875, 168.9112548828125, 189.22640991210938, 257.84228515625, 251.90509033203125, 9.282154083251953, -24.734207153320312, -0.06441879272460938, -47.605674743652344, 59.9166259765625, 62.60443115234375, 39.491188049316406, 46.62580108642578, 26.008468627929688, 130.69764709472656, 130.72238159179688], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000213.npy"}
|
|
{"epoch": 0.31277533039647576, "step": 214, "batch_size": 64, "mean": 84.21907043457031, "std": 106.22203063964844, "min": -145.9224853515625, "p10": -26.660375404357904, "median": 59.91032028198242, "p90": 233.06005859375, "max": 382.6121826171875, "pos_frac": 0.78125, "sample": [41.23822784423828, 288.4991760253906, 4.213100433349609, 124.03326416015625, 13.225191116333008, -8.062919616699219, -31.84658432006836, -145.9224853515625, 209.62222290039062, -19.16091537475586, 128.5458984375, 51.632972717285156, 69.46421813964844, -1.0649585723876953, 42.06060791015625, 70.93380737304688, 145.66989135742188, -8.574014663696289, 25.19329833984375, 132.794189453125, 49.91773986816406, 42.04009246826172, 97.80928802490234, 178.13912963867188, 189.8896484375, 56.47791290283203, 39.331153869628906, -4.145391464233398, 81.75320434570312, -29.87442970275879, 55.9184684753418, 71.0812759399414, 14.909416198730469, 272.5777282714844, -76.35907745361328, 350.9297180175781, 240.42385864257812, 19.990798950195312, 16.420181274414062, 279.3542175292969, 142.2186737060547, 121.64994812011719, 206.8857879638672, 230.764892578125, -117.7928695678711, 382.6121826171875, 21.688196182250977, 129.21664428710938, 173.47802734375, 85.59589385986328, -14.282218933105469, 117.84971618652344, -30.7947998046875, 72.69772338867188, -9.363632202148438, 160.33364868164062, 40.30006790161133, 234.043701171875, 50.98091125488281, 63.34272766113281, 74.15672302246094, 183.7390594482422, -35.72651672363281, 27.34714126586914], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000214.npy"}
|
|
{"epoch": 0.3142437591776799, "step": 215, "batch_size": 64, "mean": 88.59703063964844, "std": 96.82067108154297, "min": -111.5382080078125, "p10": -20.81867065429687, "median": 76.30872344970703, "p90": 212.51801605224617, "max": 378.843994140625, "pos_frac": 0.828125, "sample": [54.35667037963867, 236.8809814453125, 90.38140869140625, 134.69061279296875, -34.49363708496094, -49.86555480957031, -11.686515808105469, 114.2315444946289, 107.63648986816406, 69.60462188720703, 91.66615295410156, 33.34893035888672, 13.733682632446289, -8.650747299194336, 313.56011962890625, 165.24990844726562, 174.3732147216797, 220.76087951660156, 378.843994140625, 123.92532348632812, 59.24330139160156, 245.93572998046875, 97.61295318603516, -23.043777465820312, 45.73750305175781, 120.10454559326172, -38.817935943603516, 60.950950622558594, 168.9235382080078, 175.45098876953125, 33.52995300292969, 50.63136291503906, 53.923004150390625, 0.5826587677001953, 143.29653930664062, 67.52265167236328, -68.00995635986328, 134.84768676757812, 150.00955200195312, 184.02792358398438, 245.24847412109375, 84.06793975830078, 137.6363525390625, -9.935791015625, 136.29840087890625, 38.92230224609375, 64.12458038330078, -15.626754760742188, 37.23140335083008, 164.39210510253906, -111.5382080078125, 84.78649139404297, 334.44744873046875, 27.072769165039062, 62.496917724609375, 39.288330078125, 193.28466796875, 8.118698120117188, 105.49229431152344, 29.632139205932617, 84.96112060546875, 23.70445442199707, -57.91562271118164, 83.01282501220703], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000215.npy"}
|
|
{"epoch": 0.315712187958884, "step": 216, "batch_size": 64, "mean": 91.81981658935547, "std": 115.4380874633789, "min": -174.7678680419922, "p10": -44.31747970581054, "median": 67.91254043579102, "p90": 248.0164901733399, "max": 372.2532958984375, "pos_frac": 0.8125, "sample": [171.01983642578125, 255.20501708984375, -37.56543731689453, 123.77737426757812, 142.96783447265625, 41.45802307128906, -95.88694763183594, 120.89263153076172, 174.67572021484375, -18.545066833496094, 372.2532958984375, 42.90777587890625, 75.64430236816406, -47.211212158203125, 12.460552215576172, 2.143646240234375, -88.40727996826172, 187.93942260742188, 15.968639373779297, -132.036865234375, 25.83587646484375, 237.4420166015625, -4.813957214355469, 28.4754638671875, 117.64068603515625, 113.32905578613281, 33.000732421875, 143.0869598388672, 122.60882568359375, 20.971120834350586, 75.37957763671875, 139.97537231445312, 37.430328369140625, -22.713542938232422, 289.34075927734375, 218.90440368652344, -51.10319519042969, 238.200927734375, 32.020713806152344, 68.99614715576172, 31.921072006225586, 159.44461059570312, 26.697221755981445, -174.7678680419922, 66.82893371582031, 141.513671875, 200.08859252929688, 296.10662841796875, 35.73414611816406, -33.031227111816406, 217.41790771484375, 48.708984375, 59.841094970703125, 118.3479232788086, 32.822174072265625, 252.22315979003906, 192.31216430664062, 237.8834991455078, 293.17474365234375, 285.3843994140625, 23.662994384765625, -47.54234313964844, 25.679454803466797, 202.34710693359375], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000216.npy"}
|
|
{"epoch": 0.31718061674008813, "step": 217, "batch_size": 64, "mean": 105.62129211425781, "std": 104.15019989013672, "min": -106.57127380371094, "p10": -0.6350074768066318, "median": 81.65302658081055, "p90": 274.9229232788087, "max": 368.3238830566406, "pos_frac": 0.890625, "sample": [99.61825561523438, 107.12466430664062, 80.28925323486328, 75.69617462158203, 42.86266326904297, 39.281517028808594, 58.027801513671875, 163.73658752441406, 104.83721160888672, 57.16485595703125, 21.783370971679688, 284.739990234375, 131.0220947265625, 252.0164337158203, -78.91486358642578, 8.06634521484375, 35.147377014160156, -14.133544921875, 128.95220947265625, 366.7079772949219, 10.284259796142578, 61.982879638671875, 145.27957153320312, 286.9560241699219, 110.1399154663086, 166.63534545898438, -7.123863220214844, 45.23798751831055, -106.57127380371094, 79.14073181152344, 368.3238830566406, 158.9056396484375, 13.053564071655273, 47.74823760986328, 164.5086669921875, 22.903640747070312, 176.04095458984375, 216.81033325195312, 9.276763916015625, 314.324462890625, 291.21417236328125, -47.114097595214844, 250.0577850341797, 209.84109497070312, 116.28730773925781, 287.4784851074219, 45.78572082519531, 83.01679992675781, 65.40955352783203, 54.602882385253906, 90.33131408691406, 139.049072265625, 241.31097412109375, 23.107515335083008, 121.89999389648438, 55.20140838623047, -10.126472473144531, -4.364158630371094, 48.450843811035156, 31.187301635742188, 99.25424194335938, 148.36476135253906, 158.94651794433594, 12.685556411743164], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000217.npy"}
|
|
{"epoch": 0.3186490455212922, "step": 218, "batch_size": 64, "mean": 84.22761535644531, "std": 106.92158508300781, "min": -113.52789306640625, "p10": -43.46033668518066, "median": 73.67354202270508, "p90": 220.13670501708987, "max": 419.502685546875, "pos_frac": 0.78125, "sample": [73.36920166015625, 46.24982452392578, 143.88568115234375, 111.76036071777344, 124.42632293701172, 21.613521575927734, 247.85775756835938, 33.083099365234375, 8.47780990600586, 75.1669692993164, 151.7978515625, 61.757049560546875, 0.6802902221679688, 140.12161254882812, 296.3985290527344, 95.98399353027344, 97.08607482910156, -16.636032104492188, 64.4305191040039, 143.63494873046875, 44.00213623046875, 51.58989715576172, 13.309886932373047, 156.26223754882812, -65.81744384765625, -113.52789306640625, 232.7508544921875, 88.82339477539062, 138.67465209960938, 142.64022827148438, 33.63020706176758, 262.20166015625, 215.22686767578125, 200.22540283203125, 141.67431640625, 63.408843994140625, -5.949621200561523, -21.030282974243164, 94.96971893310547, 14.81844711303711, -25.998016357421875, -60.461944580078125, 115.0294418334961, -45.2938117980957, -39.182228088378906, -38.32775115966797, 419.502685546875, 222.2409210205078, 167.4174041748047, -22.72077178955078, -57.8089599609375, 190.58445739746094, 353.5775451660156, 8.933649063110352, 39.38186264038086, 92.28718566894531, 131.74908447265625, 101.20259857177734, 19.367624282836914, -58.16534423828125, 73.9778823852539, -85.1309814453125, 210.80967712402344, 68.56658935546875], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000218.npy"}
|
|
{"epoch": 0.3201174743024963, "step": 219, "batch_size": 64, "mean": 73.72860717773438, "std": 110.99615478515625, "min": -241.95303344726562, "p10": -41.34141540527343, "median": 66.62575149536133, "p90": 229.84190368652347, "max": 355.9056396484375, "pos_frac": 0.828125, "sample": [314.265625, 120.03174591064453, 19.195730209350586, 232.135986328125, 14.943185806274414, 329.60693359375, -241.95303344726562, 8.793161392211914, 22.09320068359375, 179.97573852539062, 2.181612014770508, 74.79463195800781, 155.86985778808594, 290.01678466796875, 355.9056396484375, 96.01512908935547, 40.063133239746094, 239.94232177734375, 108.78834533691406, 125.2892837524414, 154.23117065429688, 23.313980102539062, -74.77484130859375, 34.25584411621094, 12.395790100097656, -34.577735900878906, 175.49441528320312, 2.1286392211914062, 224.48904418945312, 240.33465576171875, 65.59708404541016, 197.0386505126953, 23.887832641601562, 32.05354309082031, 28.665542602539062, 133.11404418945312, 94.96602630615234, 10.712661743164062, 78.04544067382812, 15.41314697265625, 207.57516479492188, 99.92637634277344, 96.28959655761719, -102.23309326171875, 55.33416748046875, 101.15139770507812, -91.0167007446289, 7.273183822631836, 22.79168128967285, 116.33512878417969, 29.080047607421875, -44.240135192871094, 3.9263381958007812, -31.283367156982422, -124.89161682128906, 107.95539093017578, 127.28173828125, 71.1114730834961, -22.086078643798828, -21.420028686523438, 75.50714111328125, 67.6544189453125, 87.93400573730469, -46.064979553222656], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000219.npy"}
|
|
{"epoch": 0.32158590308370044, "step": 220, "batch_size": 64, "mean": 64.49595642089844, "std": 80.94681549072266, "min": -135.55108642578125, "p10": -38.5650177001953, "median": 71.39629745483398, "p90": 155.2978485107422, "max": 252.39706420898438, "pos_frac": 0.8125, "sample": [172.51980590820312, 32.13946533203125, 68.75541687011719, -27.76396942138672, 51.21282196044922, 250.08932495117188, 231.97732543945312, 11.163837432861328, 118.67849731445312, 132.0131072998047, 103.13391876220703, 129.34356689453125, 115.83956146240234, 94.19637298583984, -135.55108642578125, 71.34286499023438, 157.0598907470703, 47.03535461425781, 121.7006607055664, 151.18641662597656, 83.36985778808594, 18.795711517333984, 14.71124267578125, -106.53060913085938, -43.19403839111328, 161.73538208007812, -10.776018142700195, 41.335418701171875, 168.798583984375, 49.17985916137695, -8.552947998046875, 111.89118957519531, 130.27577209472656, -17.932205200195312, 110.11558532714844, 84.68355560302734, 252.39706420898438, 146.2699432373047, 13.980079650878906, 87.83464813232422, 7.82280158996582, 30.897563934326172, 7.83531379699707, 73.93104553222656, -53.64234161376953, 71.4497299194336, 63.4456787109375, 103.12651062011719, -84.34382629394531, 79.90815734863281, 33.87849426269531, 26.308570861816406, 148.93682861328125, -26.62815284729004, 27.166217803955078, 127.90179443359375, 104.79501342773438, 6.546760559082031, 112.0106201171875, 92.66677856445312, 8.457645416259766, 113.62677764892578, -47.744293212890625, -85.07378387451172], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000220.npy"}
|
|
{"epoch": 0.32305433186490456, "step": 221, "batch_size": 64, "mean": 128.43634033203125, "std": 113.93675231933594, "min": -97.99748229980469, "p10": -5.26657218933105, "median": 126.40237426757812, "p90": 284.6548126220703, "max": 394.9379577636719, "pos_frac": 0.875, "sample": [70.15850830078125, 62.56695556640625, 81.69206237792969, -0.9746742248535156, 75.01521301269531, 394.9379577636719, 18.74219512939453, 283.9266357421875, 175.6435546875, 221.82217407226562, 234.22003173828125, 7.039218902587891, 264.5206298828125, 119.10391235351562, 116.9353256225586, 190.60299682617188, 7.80596923828125, 294.0615539550781, -14.020998001098633, 112.62015533447266, 197.1727294921875, 139.13877868652344, 284.9668884277344, 343.0354309082031, 343.50390625, 194.52182006835938, 181.05674743652344, -91.05026245117188, 50.19355010986328, 47.251426696777344, 133.70083618164062, 163.20993041992188, -7.10595703125, 68.9651107788086, 46.64496612548828, 150.33352661132812, 80.24213409423828, -97.99748229980469, 165.15231323242188, 278.87908935546875, -32.51036834716797, 185.34466552734375, 206.8843536376953, 209.44558715820312, 94.18080139160156, 168.32275390625, 29.210556030273438, 308.34844970703125, 226.01318359375, 168.00448608398438, 5.410678863525391, -74.7156753540039, 48.448814392089844, 167.59991455078125, 31.435836791992188, -51.88685607910156, 54.10364532470703, 116.61785125732422, 52.47134780883789, 137.84481811523438, 252.76065063476562, 47.96319580078125, 317.2427978515625, 163.15003967285156], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000221.npy"}
|
|
{"epoch": 0.3245227606461087, "step": 222, "batch_size": 64, "mean": 100.5632095336914, "std": 110.70159912109375, "min": -124.70414733886719, "p10": -37.5057144165039, "median": 106.41378021240234, "p90": 257.99859008789065, "max": 368.3914794921875, "pos_frac": 0.828125, "sample": [-30.65076446533203, 298.1247253417969, 18.517837524414062, -83.126953125, 12.580848693847656, -40.44355010986328, -14.392059326171875, 69.08004760742188, 22.036163330078125, 145.08877563476562, 104.22020721435547, 112.39604949951172, 352.59295654296875, 62.8527946472168, 110.12566375732422, 104.13433074951172, -124.70414733886719, 132.887451171875, 192.2833251953125, 137.19725036621094, 135.69384765625, 97.19415283203125, -80.59182739257812, 259.546875, -77.99056243896484, 197.73121643066406, 363.2552490234375, 76.61918640136719, 59.21233367919922, 15.698860168457031, 108.60735321044922, 21.571807861328125, 164.9263916015625, -56.106048583984375, 157.3177032470703, 147.83045959472656, 35.24574279785156, 147.80471801757812, -5.557376861572266, 160.06512451171875, -3.2261276245117188, 119.43025207519531, 37.72602081298828, 45.18595886230469, 254.38592529296875, 176.55108642578125, 112.1847152709961, 176.64401245117188, 114.7552719116211, 41.307342529296875, 33.52494812011719, 152.20838928222656, 305.70684814453125, 53.76810073852539, 211.28427124023438, 110.0334701538086, 130.8068389892578, -90.87867736816406, 166.19509887695312, 28.71208953857422, 368.3914794921875, 44.406707763671875, 263.02691650390625, 75.0384750366211], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000222.npy"}
|
|
{"epoch": 0.32599118942731276, "step": 223, "batch_size": 64, "mean": 86.6192398071289, "std": 132.87014770507812, "min": -257.7807922363281, "p10": -95.66823959350586, "median": 90.77481842041016, "p90": 244.75124511718752, "max": 436.37860107421875, "pos_frac": 0.75, "sample": [98.48991394042969, -65.5064926147461, 184.05545043945312, 105.89047241210938, 72.48822784423828, 269.1996154785156, 166.83755493164062, 88.76546478271484, 153.22779846191406, 118.4822998046875, -101.30352783203125, 27.675399780273438, 62.82603454589844, 214.0067138671875, 241.22230529785156, 236.31417846679688, 92.78417205810547, 69.03190612792969, 368.742431640625, -26.089191436767578, 142.28952026367188, -16.61610984802246, 99.36217498779297, -30.006793975830078, -25.39116668701172, 242.12103271484375, -96.72421264648438, 257.6706848144531, 88.07921600341797, -115.90802764892578, 15.497406005859375, -5.4139251708984375, 215.52711486816406, -221.5050048828125, 100.01109313964844, 119.23019409179688, -95.74467468261719, 137.62432861328125, 57.30671310424805, 436.37860107421875, 234.86474609375, -95.4898910522461, 119.10921478271484, 206.671630859375, -121.20552062988281, -8.474002838134766, 198.40267944335938, 257.2474060058594, 105.02388000488281, 8.713493347167969, 42.778175354003906, 29.0081787109375, 20.091156005859375, 313.74114990234375, 36.34967803955078, 31.020484924316406, -7.427190780639648, 14.280218124389648, 140.5536651611328, 130.30740356445312, 81.17900085449219, 137.85934448242188, -257.7807922363281, 245.87847900390625], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000223.npy"}
|
|
{"epoch": 0.3274596182085169, "step": 224, "batch_size": 64, "mean": 100.13922119140625, "std": 132.05262756347656, "min": -235.12481689453125, "p10": -35.12438430786132, "median": 78.81843185424805, "p90": 279.35844116210944, "max": 517.4376220703125, "pos_frac": 0.796875, "sample": [152.3875732421875, 5.1003875732421875, 125.52009582519531, 29.73804473876953, -58.071983337402344, 430.9042663574219, 197.71144104003906, 74.83734130859375, 97.3098373413086, -235.12481689453125, 40.725685119628906, 161.4359893798828, 140.58798217773438, 115.22744750976562, 303.893310546875, 78.50833129882812, 83.44869232177734, 189.02244567871094, 180.0911102294922, 517.4376220703125, 256.8816223144531, 79.12853240966797, 232.9420166015625, 196.64669799804688, -72.48014831542969, -50.912845611572266, 223.32907104492188, 66.65095520019531, 176.02249145507812, -37.136253356933594, 28.515487670898438, 35.68659973144531, 25.854066848754883, 373.1607360839844, 190.12417602539062, 181.60784912109375, 94.5168228149414, -29.842491149902344, 30.330976486206055, 33.69734573364258, 205.08294677734375, 19.30010986328125, 30.729515075683594, -25.913036346435547, 79.52069091796875, -64.90866088867188, -30.430023193359375, -8.836196899414062, -89.75621032714844, -12.561328887939453, 167.9306640625, 114.11474609375, 299.0848388671875, 18.766511917114258, 15.079734802246094, 43.72286605834961, 10.555389404296875, 244.77008056640625, 44.24595642089844, -9.5521240234375, 84.07870483398438, 289.9358825683594, 288.9913635253906, 29.542842864990234], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000224.npy"}
|
|
{"epoch": 0.328928046989721, "step": 225, "batch_size": 64, "mean": 121.92388153076172, "std": 136.9205780029297, "min": -153.71499633789062, "p10": -52.67169380187987, "median": 123.89436721801758, "p90": 311.4676422119141, "max": 499.0793762207031, "pos_frac": 0.75, "sample": [215.9815673828125, 117.53619384765625, 348.42626953125, 304.7852783203125, 37.880882263183594, 93.92664337158203, 47.86347961425781, -68.74383544921875, -57.33184814453125, 340.404052734375, 314.3315124511719, 197.1356201171875, -83.46925354003906, -34.16039276123047, 239.15345764160156, -6.209239959716797, -109.72016906738281, 12.681127548217773, -41.79800033569336, 36.02561950683594, 135.35150146484375, 130.82342529296875, 170.26019287109375, 141.3580322265625, -3.196260452270508, 118.61658477783203, 56.51972961425781, 137.0949249267578, -5.810298919677734, 224.37457275390625, 267.524658203125, 138.89755249023438, 171.0659942626953, 84.61263275146484, -12.289493560791016, 358.8641357421875, 353.05096435546875, -19.80915069580078, 233.36907958984375, 357.22283935546875, 80.39805603027344, 233.60646057128906, 127.97364807128906, -29.918115615844727, 123.70780944824219, 107.45146942138672, 499.0793762207031, 249.18853759765625, 147.05841064453125, 104.20504760742188, 261.67236328125, 18.45098876953125, 178.79795837402344, -58.433837890625, -80.89129638671875, 271.916748046875, 239.5707244873047, 133.78875732421875, 155.0714874267578, -153.71499633789062, 124.08092498779297, 115.04403686523438, -28.342689514160156, 40.765960693359375], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000225.npy"}
|
|
{"epoch": 0.3303964757709251, "step": 226, "batch_size": 64, "mean": 97.67030334472656, "std": 131.96148681640625, "min": -192.04440307617188, "p10": -24.04432296752928, "median": 67.26629257202148, "p90": 283.9673583984375, "max": 471.26531982421875, "pos_frac": 0.828125, "sample": [471.26531982421875, -91.79586791992188, -31.090999603271484, 44.74882507324219, -7.602077484130859, 145.98228454589844, 23.435653686523438, 68.95936584472656, 135.9046630859375, -4.789836883544922, 95.11699676513672, 33.88227844238281, 21.610000610351562, 68.9335708618164, -122.53033447265625, 68.36029052734375, -54.0696907043457, 255.15902709960938, 277.85064697265625, 66.17229461669922, 286.58880615234375, 294.9743347167969, 54.305015563964844, 106.22752380371094, 90.83271789550781, 195.49449157714844, 36.416343688964844, -5.1806182861328125, 78.43408203125, 354.8272705078125, 213.1410369873047, -50.06629943847656, 153.78077697753906, 68.64974212646484, 28.96055030822754, 428.49468994140625, 182.0509033203125, 445.50567626953125, 229.84408569335938, -4.624366760253906, 5.080299377441406, 79.89417266845703, -192.04440307617188, 100.95639038085938, 31.486141204833984, 65.94627380371094, 4.507820129394531, 31.01495361328125, 27.825586318969727, 65.69217681884766, 18.46807861328125, 235.17225646972656, 90.9962158203125, 157.9400634765625, 49.87639617919922, 105.43751525878906, -46.48236083984375, 7.227991104125977, 359.00372314453125, 50.43846893310547, 40.14624786376953, 100.96851348876953, 6.372222900390625, 200.81581115722656], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000226.npy"}
|
|
{"epoch": 0.33186490455212925, "step": 227, "batch_size": 64, "mean": 118.8543472290039, "std": 135.3520050048828, "min": -307.88690185546875, "p10": -18.56213626861571, "median": 103.11067581176758, "p90": 312.42778320312505, "max": 463.7841796875, "pos_frac": 0.859375, "sample": [241.00234985351562, 82.56631469726562, 124.91458892822266, 320.0325927734375, 171.09011840820312, 43.70819854736328, 71.58735656738281, 101.1601333618164, 226.80140686035156, -8.042617797851562, 76.05797576904297, 105.06121826171875, 134.31329345703125, 16.26142692565918, -48.751895904541016, 23.448440551757812, 142.027587890625, 96.15739440917969, 167.8317413330078, 455.558349609375, 111.65048217773438, 52.534454345703125, 271.3188781738281, 156.8024444580078, 57.78551483154297, 229.5826873779297, 150.1607666015625, 204.33303833007812, 90.332763671875, 90.18780517578125, 173.81069946289062, 96.72248840332031, 263.7086486816406, 4.974588394165039, -47.355316162109375, -111.52055358886719, 150.98822021484375, 175.7216339111328, 363.83416748046875, 100.80918884277344, 132.9802703857422, 333.00726318359375, -98.48675537109375, 463.7841796875, 21.279447555541992, 28.04151153564453, 168.4882354736328, 46.333839416503906, 175.75082397460938, 31.093482971191406, 120.95803833007812, 347.18304443359375, 134.28277587890625, 21.927597045898438, 0.42228126525878906, -30.48194694519043, 294.6832275390625, -307.88690185546875, -4.181718826293945, 95.21604919433594, -23.07050132751465, 27.87558937072754, 117.71920776367188, 380.5904846191406], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000227.npy"}
|
|
{"epoch": 0.3333333333333333, "step": 228, "batch_size": 64, "mean": 85.70872497558594, "std": 118.69967651367188, "min": -177.1781005859375, "p10": -60.879191589355464, "median": 86.79512786865234, "p90": 229.333171081543, "max": 395.6499328613281, "pos_frac": 0.765625, "sample": [-106.92481231689453, -135.480712890625, -177.1781005859375, 143.14230346679688, 85.14713287353516, 277.6229248046875, -139.71722412109375, 136.6378173828125, 153.38851928710938, -106.03512573242188, 28.52069091796875, 194.56576538085938, 316.26922607421875, 215.53817749023438, 307.35357666015625, 290.95538330078125, 285.9688720703125, 149.3864288330078, 98.99777221679688, -18.60216522216797, -29.331100463867188, 230.51837158203125, 86.0266342163086, 220.81797790527344, 146.81024169921875, 116.01277160644531, 91.59385681152344, 20.227588653564453, -53.536903381347656, 97.73139953613281, 44.89959716796875, 101.05035400390625, 12.924110412597656, 182.87750244140625, 181.28225708007812, 87.5636215209961, 70.6094741821289, 68.72055053710938, 60.96580505371094, 25.120849609375, 74.4330825805664, 168.84242248535156, -30.09674072265625, -7.9100799560546875, 102.48617553710938, 73.28345489501953, 395.6499328613281, 16.14470672607422, 88.67697143554688, 50.18273162841797, 226.5677032470703, -14.524242401123047, -28.09458351135254, 166.42098999023438, 170.6793975830078, 128.624755859375, 148.63685607910156, 93.92867279052734, -64.02588653564453, 13.30804443359375, -11.337379455566406, 40.90045166015625, -82.55955505371094, 2.6987686157226562], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000228.npy"}
|
|
{"epoch": 0.33480176211453744, "step": 229, "batch_size": 64, "mean": 96.2049560546875, "std": 120.00987243652344, "min": -268.91168212890625, "p10": -35.321815490722656, "median": 96.63675308227539, "p90": 263.2721954345704, "max": 427.19610595703125, "pos_frac": 0.796875, "sample": [19.206253051757812, 11.498703002929688, -36.13983154296875, 296.5016174316406, 47.99484634399414, -119.37032318115234, 102.8657455444336, 141.99546813964844, 60.26298522949219, 170.09259033203125, 241.36090087890625, 272.6627502441406, 69.76812744140625, 304.890625, 107.77915954589844, 94.08757781982422, 145.0098876953125, 105.53154754638672, 22.167865753173828, 227.30972290039062, -53.30509948730469, 81.58042907714844, 11.17086410522461, 193.88351440429688, 106.97156524658203, 50.91600799560547, 427.19610595703125, -89.02304077148438, 20.565597534179688, 177.337158203125, 165.53330993652344, 210.19699096679688, 188.39694213867188, 272.7421875, 290.2763977050781, 52.63957977294922, 192.0471954345703, 31.519676208496094, 103.4586181640625, -36.031280517578125, 87.25736236572266, 99.18592834472656, 10.982917785644531, 160.98887634277344, -14.685018539428711, 101.8304443359375, 177.86370849609375, 179.71417236328125, -19.1268310546875, -83.34812927246094, 107.69532775878906, 345.46502685546875, 27.419992446899414, 69.13845825195312, 89.06462097167969, 132.52455139160156, -1.1673965454101562, 147.90286254882812, -6.597618103027344, 157.06549072265625, -268.91168212890625, -0.228271484375, -33.66639709472656, 7.199882507324219], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000229.npy"}
|
|
{"epoch": 0.33627019089574156, "step": 230, "batch_size": 64, "mean": 129.74635314941406, "std": 124.8717269897461, "min": -214.30633544921875, "p10": -17.12615509033203, "median": 122.78029251098633, "p90": 321.2072174072266, "max": 390.1956481933594, "pos_frac": 0.84375, "sample": [92.495361328125, 304.6639099121094, -3.228055953979492, 241.5364532470703, 164.80526733398438, 68.66600799560547, -11.956117630004883, 193.2693634033203, 91.17808532714844, 118.24465942382812, 160.85450744628906, 129.18734741210938, -17.033485412597656, 170.59490966796875, 43.401519775390625, 70.54202270507812, -35.23631286621094, 77.41651916503906, 43.1357421875, 96.71658325195312, 45.0755615234375, -83.02237701416016, -17.165870666503906, 147.39288330078125, 119.85124969482422, 134.564208984375, 174.68692016601562, 287.44207763671875, 216.5078125, 137.64132690429688, 77.42349243164062, 24.445838928222656, 179.30372619628906, 355.4236755371094, 134.31455993652344, -53.34899139404297, 111.29280853271484, 154.4110107421875, 199.39566040039062, 372.3280029296875, -34.90724563598633, 5.9034576416015625, -214.30633544921875, -20.34221649169922, 11.742584228515625, 30.959304809570312, 314.5953063964844, 99.94027709960938, 324.0408935546875, 349.51995849609375, 168.65773010253906, 86.37890625, 91.68473815917969, 34.73565673828125, 309.943115234375, 204.6089324951172, 380.1017150878906, 125.70933532714844, 328.1416931152344, 171.18174743652344, 196.9370574951172, 100.44805145263672, 130.67921447753906, 390.1956481933594], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000230.npy"}
|
|
{"epoch": 0.3377386196769457, "step": 231, "batch_size": 64, "mean": 94.08336639404297, "std": 111.48593139648438, "min": -78.64169311523438, "p10": -21.852981948852534, "median": 77.29290008544922, "p90": 246.6435745239259, "max": 462.9366149902344, "pos_frac": 0.828125, "sample": [371.5531005859375, 100.44479370117188, -17.835472106933594, 189.49853515625, 17.776466369628906, 76.93763732910156, 260.7441101074219, 49.57568359375, 53.81609344482422, 119.9648208618164, 51.10382080078125, 206.11480712890625, -44.4858283996582, 77.64816284179688, 373.2308044433594, 95.359130859375, 169.89520263671875, 132.31700134277344, 116.55272674560547, 178.5345458984375, 264.8733825683594, 50.085880279541016, 82.7816390991211, 55.67854309082031, 51.061668395996094, -29.691246032714844, 279.92486572265625, -46.41630935668945, 170.17913818359375, 462.9366149902344, 366.6675720214844, 162.97543334960938, 106.51309967041016, 10.51161003112793, -66.44080352783203, 55.00791549682617, 83.39315795898438, 152.24209594726562, 59.139923095703125, -15.45947265625, -2.9286346435546875, 42.363372802734375, 153.06578063964844, 47.90776062011719, 4.106178283691406, 80.35973358154297, -11.859542846679688, -78.64169311523438, 25.136554718017578, 118.35830688476562, 213.74232482910156, 120.51724243164062, 52.74231719970703, 25.885677337646484, -23.574771881103516, 102.27975463867188, -66.88217163085938, 89.79843139648438, 21.424118041992188, 13.730682373046875, 15.21026611328125, 112.74642944335938, 99.25450134277344, 31.882186889648438], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000231.npy"}
|
|
{"epoch": 0.3392070484581498, "step": 232, "batch_size": 64, "mean": 77.80187225341797, "std": 120.62252807617188, "min": -112.61262512207031, "p10": -49.44564743041992, "median": 51.68790054321289, "p90": 242.06394348144542, "max": 430.42510986328125, "pos_frac": 0.75, "sample": [125.51608276367188, 99.08171844482422, 1.7286624908447266, 50.12348175048828, 13.60009765625, -50.71504211425781, 281.2246398925781, 166.87924194335938, -16.06798553466797, 210.62548828125, 38.455535888671875, 292.50177001953125, 72.20697784423828, -46.483726501464844, -17.414886474609375, 50.929283142089844, -25.7333984375, 151.55374145507812, -85.68177795410156, 74.66773223876953, 118.32261657714844, 415.72943115234375, 23.55572509765625, 58.368621826171875, 96.47876739501953, -4.673229217529297, 430.42510986328125, 12.72930908203125, 19.80810546875, 217.55264282226562, 128.037841796875, 177.31649780273438, -71.2762451171875, 52.44651794433594, -71.11283874511719, 90.07565307617188, -84.34680938720703, 13.219253540039062, -16.213226318359375, 422.27252197265625, -1.669281005859375, -11.492393493652344, 26.339927673339844, -112.61262512207031, 25.432910919189453, 70.33072662353516, 60.52984619140625, -101.36656951904297, 182.9436492919922, 103.14710998535156, 279.807861328125, 27.69540023803711, 21.84210205078125, 252.56878662109375, 197.859375, 15.439374923706055, 129.08905029296875, 152.67808532714844, -13.110418319702148, 44.52196502685547, 27.655460357666016, 55.19797134399414, 65.25404357910156, 65.52318572998047], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000232.npy"}
|
|
{"epoch": 0.3406754772393539, "step": 233, "batch_size": 64, "mean": 120.17196655273438, "std": 157.10250854492188, "min": -138.62652587890625, "p10": -41.996877670288086, "median": 80.6141128540039, "p90": 340.8126495361328, "max": 553.37451171875, "pos_frac": 0.765625, "sample": [18.074357986450195, 18.549800872802734, 393.78826904296875, -8.497940063476562, 135.7777557373047, 169.91452026367188, -120.97157287597656, -75.73717498779297, 7.0110626220703125, -126.60760498046875, 81.78791809082031, 53.21809387207031, 117.33393859863281, 209.71145629882812, 214.27276611328125, 553.37451171875, 122.48841094970703, 65.56672668457031, 361.1739501953125, 259.10516357421875, 46.7188720703125, 136.9492645263672, 71.30416870117188, 390.62359619140625, 7.584877014160156, 211.7892608642578, 15.5845947265625, -41.26995849609375, -109.56584930419922, -17.811359405517578, 166.61126708984375, 343.5240478515625, 172.86196899414062, 144.44973754882812, 249.92909240722656, 334.4860534667969, -14.430582046508789, 327.2572021484375, -4.289787292480469, 47.94868469238281, 109.05846405029297, 327.74267578125, 3.0965919494628906, 218.3865966796875, 53.32627868652344, 244.2901611328125, -18.112041473388672, -138.62652587890625, -85.61747741699219, 9.99969482421875, 41.11956024169922, 17.803916931152344, 281.35675048828125, 79.4403076171875, -39.370452880859375, 151.63954162597656, -23.437686920166016, 384.40191650390625, -42.308414459228516, 498.79052734375, 278.7870788574219, 63.76802062988281, 191.33734130859375, 154.54302978515625], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000233.npy"}
|
|
{"epoch": 0.342143906020558, "step": 234, "batch_size": 64, "mean": 116.25048828125, "std": 117.8036880493164, "min": -110.21624755859375, "p10": -26.88932113647461, "median": 112.17641830444336, "p90": 285.6004974365235, "max": 468.1585388183594, "pos_frac": 0.828125, "sample": [-62.95337677001953, 209.39476013183594, 192.91738891601562, 136.88507080078125, 212.2361602783203, 128.4949951171875, 116.37814331054688, 150.27694702148438, 320.72857666015625, 4.612123489379883, 162.75836181640625, 101.92430114746094, -110.21624755859375, 23.08560562133789, 40.28947448730469, 188.13430786132812, 50.71919250488281, -49.934425354003906, 50.03131103515625, 216.54339599609375, 468.1585388183594, 144.867919921875, 19.3525447845459, -13.084266662597656, 45.647911071777344, -7.1743621826171875, 160.92776489257812, 236.69537353515625, 15.602104187011719, 298.1341552734375, -0.0067844390869140625, 122.74371337890625, 273.4356689453125, 114.10466766357422, 323.9119873046875, 68.84468078613281, 60.58655548095703, 151.085205078125, 84.06683349609375, 165.8644561767578, 180.64285278320312, 130.14451599121094, 188.9954833984375, 65.5655517578125, 29.091285705566406, 74.02262878417969, -30.037353515625, -26.724388122558594, 174.27029418945312, 91.7899169921875, 70.06159210205078, 313.4393310546875, 418.8869323730469, -34.98450469970703, 218.70626831054688, 107.98992919921875, -26.960006713867188, 14.403684616088867, 131.03639221191406, 38.73585510253906, 110.2481689453125, -62.25761413574219, 290.8139953613281, 156.07940673828125], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000234.npy"}
|
|
{"epoch": 0.3436123348017621, "step": 235, "batch_size": 64, "mean": 76.87075805664062, "std": 118.8454818725586, "min": -210.042724609375, "p10": -38.05217514038086, "median": 62.0001220703125, "p90": 224.13722991943365, "max": 417.25482177734375, "pos_frac": 0.75, "sample": [32.33930969238281, 194.97738647460938, -210.042724609375, 15.091903686523438, 178.22984313964844, 417.25482177734375, 107.70560455322266, 256.1976013183594, -51.14719009399414, 138.3723907470703, -120.19065856933594, -38.07362365722656, 26.45147705078125, 173.11412048339844, 59.037628173828125, -6.3508148193359375, -4.1014862060546875, 9.676704406738281, 88.64942932128906, -154.76841735839844, 20.009395599365234, 15.1075439453125, -22.184295654296875, 26.20838165283203, 249.75296020507812, 158.1526641845703, 20.568382263183594, -28.637664794921875, 116.95845794677734, 228.84535217285156, 123.80758666992188, 79.41191101074219, 213.151611328125, -24.220596313476562, 112.33831787109375, 159.318603515625, 25.868772506713867, 145.13400268554688, 47.82173156738281, 50.31500244140625, 24.448848724365234, 14.007888793945312, 356.4865417480469, 139.5523681640625, 154.4724578857422, 148.331298828125, 158.72930908203125, -22.15874481201172, -123.2787094116211, 64.96261596679688, 348.22283935546875, 195.9033203125, -59.191505432128906, 8.396820068359375, 81.23965454101562, -20.140363693237305, -38.00212860107422, 115.71723937988281, 68.80340576171875, 15.157747268676758, 143.01583862304688, 77.56069946289062, 260.4107971191406, -23.071395874023438], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000235.npy"}
|
|
{"epoch": 0.34508076358296624, "step": 236, "batch_size": 64, "mean": 121.25318145751953, "std": 107.75322723388672, "min": -50.204307556152344, "p10": 0.2720298767089886, "median": 96.37862014770508, "p90": 262.8299621582031, "max": 395.1072692871094, "pos_frac": 0.890625, "sample": [302.97247314453125, 72.25634002685547, -4.57122802734375, 118.92842864990234, 166.98997497558594, 88.31336975097656, 295.131591796875, 41.06843566894531, 64.15090942382812, 99.39105224609375, 127.84616088867188, 262.2475280761719, 95.21530151367188, 141.43606567382812, 185.19528198242188, 229.64669799804688, 54.49507141113281, 216.82875061035156, 9.059654235839844, 262.809326171875, 95.80900573730469, -4.841278076171875, 43.04711151123047, 232.97634887695312, 226.64614868164062, 21.70602798461914, 85.0385971069336, 295.16796875, 196.62484741210938, 52.38841247558594, 395.1072692871094, 87.77837371826172, -1.5383453369140625, 183.51060485839844, 12.486705780029297, 192.25775146484375, 10.033821105957031, 353.822265625, 67.77722930908203, 5.671031951904297, 101.42151641845703, 61.38794708251953, 25.214635848999023, 18.667884826660156, 5.738109588623047, 202.94142150878906, 75.95024108886719, 48.26325225830078, -50.204307556152344, -25.9903564453125, 123.56851196289062, 353.5870666503906, 262.83880615234375, 80.73979187011719, 192.5167999267578, 110.58425903320312, 232.50973510742188, 96.94823455810547, 4.496238708496094, 126.81886291503906, -46.70361328125, 245.7720184326172, -14.296772003173828, 146.55270385742188], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000236.npy"}
|
|
{"epoch": 0.3465491923641703, "step": 237, "batch_size": 64, "mean": 99.26751708984375, "std": 129.9894256591797, "min": -243.56524658203125, "p10": -39.86185531616211, "median": 89.83023834228516, "p90": 272.01008911132817, "max": 486.17730712890625, "pos_frac": 0.765625, "sample": [119.74577331542969, 172.5629119873047, 217.8607177734375, 186.49456787109375, -3.4096603393554688, 108.85643768310547, 94.63355255126953, 136.7252197265625, 9.564510345458984, 103.95034790039062, 51.75028991699219, 35.34369659423828, 356.3172607421875, 85.02692413330078, 179.16738891601562, -7.9284820556640625, -80.20977783203125, 122.2607421875, -40.49250030517578, 110.68036651611328, 138.81492614746094, -60.04309844970703, 290.8011779785156, 486.17730712890625, -58.434661865234375, 309.1282653808594, 210.73455810546875, 25.2779541015625, 11.409347534179688, 4.830196380615234, 366.42486572265625, 70.86158752441406, 188.35060119628906, 191.09107971191406, 24.27075958251953, 145.8706817626953, 3.9340591430664062, 76.23560333251953, 207.51608276367188, -15.834175109863281, 238.98568725585938, 36.273216247558594, 148.16024780273438, 173.53973388671875, 337.69622802734375, 29.00897216796875, -13.043060302734375, 81.34044647216797, -243.56524658203125, 254.4332275390625, -13.155698776245117, 56.511085510253906, -108.30606079101562, -35.528839111328125, 150.9686279296875, 227.30577087402344, -38.390350341796875, 279.54302978515625, 7.659645080566406, -55.7611083984375, -10.834823608398438, 112.5870132446289, 36.8489990234375, 124.52702331542969], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000237.npy"}
|
|
{"epoch": 0.34801762114537443, "step": 238, "batch_size": 64, "mean": 99.44230651855469, "std": 130.80380249023438, "min": -130.09750366210938, "p10": -23.37137222290039, "median": 78.21393966674805, "p90": 226.59968719482424, "max": 569.5369262695312, "pos_frac": 0.765625, "sample": [9.237503051757812, -8.698776245117188, 5.1919403076171875, 119.25874328613281, 26.864826202392578, 4.3434295654296875, 111.07673645019531, 207.2021484375, -43.451751708984375, 294.49859619140625, 72.60961151123047, 156.78082275390625, 83.81826782226562, 43.952640533447266, 14.852910995483398, 128.36993408203125, 220.81912231445312, -38.83195495605469, 46.469635009765625, 51.83208084106445, -3.6736316680908203, 496.9004211425781, 28.433670043945312, 160.04559326171875, -29.331008911132812, 448.9831848144531, 87.43098449707031, 19.397254943847656, 157.47512817382812, -65.44975280761719, 123.97523498535156, 62.231597900390625, 105.04547119140625, 142.48287963867188, 265.4555358886719, 17.58578872680664, 59.248226165771484, 102.15663146972656, 569.5369262695312, 109.16101837158203, -9.996540069580078, -5.2586669921875, 44.51730728149414, -103.4190902709961, -13.368453979492188, 155.19296264648438, 157.47625732421875, 167.2471923828125, 70.02171325683594, 340.0094299316406, 189.17056274414062, -23.38397216796875, 171.1768798828125, 192.8845672607422, -130.09750366210938, 229.0770721435547, 203.341796875, 48.39872741699219, -23.34197235107422, -11.937850952148438, 131.39886474609375, 142.70684814453125, -15.244232177734375, 94.4486312866211], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000238.npy"}
|
|
{"epoch": 0.34948604992657856, "step": 239, "batch_size": 64, "mean": 107.66412353515625, "std": 121.94551086425781, "min": -163.82174682617188, "p10": -26.64986953735351, "median": 97.65554809570312, "p90": 293.42213134765626, "max": 375.83154296875, "pos_frac": 0.8125, "sample": [330.1780090332031, 177.8176727294922, -62.68394470214844, 292.8042907714844, -61.435272216796875, 105.25, 85.3365478515625, 110.90158081054688, -20.54897689819336, -111.86837768554688, 40.6761360168457, 35.58363342285156, 53.26120376586914, 215.30728149414062, 61.27123260498047, 1.3191986083984375, 351.2783508300781, 108.25059509277344, 146.36856079101562, 132.72598266601562, 202.54966735839844, 142.60035705566406, 193.88233947753906, -51.154964447021484, 8.306999206542969, 32.16981506347656, 147.72268676757812, 293.6869201660156, 144.70664978027344, 86.35090637207031, -0.7106227874755859, 20.565643310546875, 297.6456298828125, 307.724365234375, -108.77409362792969, 50.71692657470703, -0.8291244506835938, 283.2010192871094, -13.775375366210938, 254.3951416015625, 46.704105377197266, 262.32501220703125, 175.50079345703125, 131.18411254882812, 14.865188598632812, 39.11444854736328, -163.82174682617188, 60.81095886230469, -29.264537811279297, 124.41242980957031, 133.34300231933594, 143.2655029296875, 232.91856384277344, 53.635311126708984, 375.83154296875, 172.20654296875, 336.12713623046875, 61.70634841918945, 90.06109619140625, 106.50867462158203, 52.767669677734375, -12.201133728027344, 172.0946044921875, 27.63387107849121], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000239.npy"}
|
|
{"epoch": 0.3509544787077827, "step": 240, "batch_size": 64, "mean": 97.0358657836914, "std": 138.20411682128906, "min": -168.30294799804688, "p10": -109.94934539794922, "median": 111.98036575317383, "p90": 297.5318908691406, "max": 404.2912902832031, "pos_frac": 0.78125, "sample": [111.89209747314453, 131.72320556640625, 142.00112915039062, 59.602378845214844, 113.23558044433594, 299.90411376953125, 98.53752136230469, 305.8105163574219, -111.65341186523438, -49.17266082763672, 176.63539123535156, 114.06053161621094, -134.14620971679688, 162.16717529296875, 291.9967041015625, 70.94266510009766, -129.27789306640625, 41.71380615234375, 25.219512939453125, 205.4508056640625, 142.06289672851562, 319.00823974609375, 126.03358459472656, 125.9493179321289, 52.995208740234375, 18.396862030029297, -168.30294799804688, 83.13260650634766, 96.07620239257812, 375.7318115234375, 182.2483673095703, 95.92166137695312, 113.74360656738281, 45.315269470214844, -134.80902099609375, 275.6190490722656, 186.87498474121094, 176.27481079101562, -134.65728759765625, 70.55782318115234, -47.19917678833008, 224.09947204589844, -97.46470642089844, 53.55752182006836, 404.2912902832031, -105.97319030761719, 228.70220947265625, -32.464820861816406, 221.40390014648438, 141.9680938720703, 112.06863403320312, 315.63958740234375, 3.007476806640625, 102.52571868896484, -91.79576110839844, 112.5809555053711, 88.06847381591797, -31.111722946166992, 118.66217041015625, 140.6575469970703, 38.61500549316406, 130.69781494140625, 366.4242858886719, -161.48097229003906], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000240.npy"}
|
|
{"epoch": 0.3524229074889868, "step": 241, "batch_size": 64, "mean": 148.89053344726562, "std": 163.06895446777344, "min": -195.0648651123047, "p10": -19.695962524414064, "median": 125.12089920043945, "p90": 338.80834045410165, "max": 684.164794921875, "pos_frac": 0.84375, "sample": [82.20230865478516, 94.26148986816406, -111.61194610595703, -25.993722915649414, 12.936248779296875, 203.11788940429688, 184.19430541992188, 346.674560546875, 92.97689819335938, 128.17019653320312, 101.7908935546875, -19.473495483398438, -4.088718414306641, 429.64691162109375, -19.791305541992188, 141.75918579101562, -17.854015350341797, 269.819091796875, 165.61099243164062, 150.63868713378906, 282.85675048828125, 155.59121704101562, 87.27989959716797, 96.63069152832031, 684.164794921875, 320.4538269042969, 164.40185546875, 122.07160186767578, 497.6209411621094, 26.085968017578125, 624.9805297851562, 388.87322998046875, 282.1067199707031, 70.63460540771484, 67.36747741699219, 91.27201843261719, 180.24777221679688, 96.5445327758789, 9.57025146484375, 137.19871520996094, 117.24405670166016, 64.60671997070312, 101.25982666015625, 7.5113372802734375, 227.49993896484375, 64.96102905273438, 227.29531860351562, -31.918758392333984, 201.93075561523438, 183.85394287109375, 58.45683288574219, 54.588706970214844, 202.04440307617188, 428.20147705078125, 221.99461364746094, 139.736328125, -155.20001220703125, 247.9810333251953, 229.62265014648438, 232.05258178710938, -195.0648651123047, 316.1312561035156, 63.63302993774414, -70.36846923828125], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000241.npy"}
|
|
{"epoch": 0.35389133627019087, "step": 242, "batch_size": 64, "mean": 106.50521850585938, "std": 134.48931884765625, "min": -170.93572998046875, "p10": -23.46363754272461, "median": 82.6564826965332, "p90": 266.3873123168946, "max": 553.6163940429688, "pos_frac": 0.859375, "sample": [-22.990478515625, 49.11664962768555, 132.14422607421875, 176.7176055908203, 51.490169525146484, -170.93572998046875, 22.434432983398438, 0.09738922119140625, 248.3400421142578, 87.25871276855469, 26.02547264099121, 182.9995880126953, 476.22509765625, 24.54365348815918, 10.380027770996094, 60.075233459472656, 88.06322479248047, 96.51104736328125, 69.283447265625, 8.331876754760742, 197.43612670898438, 59.163482666015625, 208.32286071777344, 174.85638427734375, 209.7310791015625, -24.23986053466797, 125.8468246459961, 81.34375762939453, 101.33654022216797, -46.450714111328125, 130.5040283203125, 175.8675079345703, 82.40094757080078, -95.2332763671875, 228.09164428710938, 72.24488067626953, 274.1218566894531, 367.29742431640625, 32.15306854248047, 35.05633544921875, 75.77589416503906, 153.7187042236328, 7.9205780029296875, 553.6163940429688, 25.95325469970703, 65.62908172607422, 397.7630615234375, 84.99085998535156, 11.215957641601562, 21.005525588989258, -81.74724578857422, 375.97772216796875, 146.92050170898438, -20.97412872314453, 91.35122680664062, 82.91201782226562, 115.42449951171875, -104.20613098144531, 192.72789001464844, 144.93678283691406, 4.439447402954102, 342.45941162109375, 146.22679138183594, -23.666419982910156], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000242.npy"}
|
|
{"epoch": 0.355359765051395, "step": 243, "batch_size": 64, "mean": 132.05882263183594, "std": 143.0041961669922, "min": -197.5450439453125, "p10": -25.266737556457517, "median": 110.03360366821289, "p90": 310.3403472900392, "max": 603.6286010742188, "pos_frac": 0.84375, "sample": [244.02528381347656, 17.175533294677734, 74.40467071533203, 382.81585693359375, 172.4849853515625, 101.54680633544922, 91.26548767089844, 167.44285583496094, -71.69091796875, -74.98844146728516, -57.822418212890625, 64.53057861328125, 227.33346557617188, 51.09803009033203, -81.61735534667969, 51.379364013671875, -26.34222412109375, 268.4967041015625, 118.52040100097656, 129.15081787109375, -197.5450439453125, 35.64839172363281, 141.9752197265625, 29.548683166503906, 17.353723526000977, 259.489990234375, 44.66492462158203, 128.308349609375, 94.95360565185547, 322.83734130859375, 251.33477783203125, 265.4964904785156, -22.75726890563965, 139.36964416503906, -26.4901123046875, 274.572021484375, 212.4947509765625, 414.3752746582031, 252.66822814941406, 222.55850219726562, 275.8456115722656, 70.52790069580078, 360.4084777832031, 603.6286010742188, 335.72412109375, 61.89002990722656, 89.63623809814453, 22.444690704345703, 281.1806945800781, -16.323562622070312, 127.00830078125, 168.34597778320312, 30.59735107421875, 128.85647583007812, 424.8379821777344, 35.976722717285156, 135.22921752929688, 79.89576721191406, -1.7278900146484375, 215.07086181640625, 41.24006652832031, 46.28636169433594, 53.45018005371094, 171.6676025390625], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000243.npy"}
|
|
{"epoch": 0.3568281938325991, "step": 244, "batch_size": 64, "mean": 125.82269287109375, "std": 122.1434555053711, "min": -118.03031921386719, "p10": -15.207788085937496, "median": 112.3343276977539, "p90": 305.15094604492185, "max": 412.46038818359375, "pos_frac": 0.84375, "sample": [109.32084655761719, 0.29907989501953125, 76.52133178710938, 128.51788330078125, 16.116130828857422, 45.124271392822266, 355.8081359863281, 133.51898193359375, -39.6192626953125, 23.67919921875, 160.2683563232422, 171.22323608398438, -21.8504638671875, 238.06495666503906, 36.411102294921875, 129.6201629638672, -1.9277915954589844, 375.16021728515625, 23.292102813720703, 88.80683135986328, -118.03031921386719, 105.60617065429688, 249.12461853027344, -57.461273193359375, 169.56689453125, 29.240678787231445, 103.67850494384766, 206.61053466796875, 93.74702453613281, 12.617599487304688, 199.1569061279297, 106.5807876586914, 244.6722412109375, 260.6007080078125, 356.49102783203125, 364.55963134765625, 171.0626678466797, 305.1824035644531, 39.30500030517578, 412.46038818359375, 194.16249084472656, -10.970342636108398, 279.5045166015625, 139.02737426757812, 129.98561096191406, -49.742706298828125, 84.51522064208984, -17.023836135864258, 13.431655883789062, 161.0155029296875, -1.7722015380859375, 45.320716857910156, 134.60887145996094, 76.37100982666016, 39.0279541015625, 88.94947814941406, 305.0775451660156, 167.72109985351562, -37.58403396606445, 115.34780883789062, 123.85755920410156, 350.75872802734375, 159.79608154296875, 258.13885498046875], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000244.npy"}
|
|
{"epoch": 0.35829662261380324, "step": 245, "batch_size": 64, "mean": 110.26097106933594, "std": 147.71131896972656, "min": -303.2307434082031, "p10": -20.022031211853026, "median": 79.41214370727539, "p90": 280.6187316894532, "max": 534.865966796875, "pos_frac": 0.8125, "sample": [72.08170318603516, 374.58831787109375, 148.28414916992188, 129.87171936035156, 139.9593505859375, 126.9715576171875, 58.024818420410156, -13.631729125976562, 204.2305908203125, 95.10806274414062, 60.382537841796875, 92.34217834472656, 164.42799377441406, 58.29208755493164, 80.2239761352539, 157.08041381835938, 197.48358154296875, 250.03648376464844, 78.60031127929688, 39.52705001831055, 449.1502380371094, 118.49400329589844, -18.979188919067383, 303.44195556640625, 35.095603942871094, 6.9607086181640625, 141.09666442871094, -28.866878509521484, -156.82948303222656, -4.566688537597656, 239.30995178222656, 136.32569885253906, 534.865966796875, -96.49598693847656, 160.56918334960938, 18.50933074951172, 19.17211151123047, 76.77864074707031, 259.1153259277344, 83.93415069580078, -7.644222259521484, 130.00440979003906, 162.6527862548828, -20.468963623046875, 41.94239044189453, -303.2307434082031, 241.38182067871094, 260.5076599121094, 47.251487731933594, 155.93678283691406, 289.2377624511719, 10.748260498046875, 512.3894653320312, -32.98580551147461, -18.510997772216797, 452.972900390625, 70.93362426757812, 135.71554565429688, 14.483627319335938, -75.73367309570312, 75.15142822265625, 37.41639709472656, 67.06432342529297, 18.519187927246094], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000245.npy"}
|
|
{"epoch": 0.35976505139500736, "step": 246, "batch_size": 64, "mean": 102.74065399169922, "std": 140.1747283935547, "min": -173.40025329589844, "p10": -52.44059906005858, "median": 63.465423583984375, "p90": 295.0213439941407, "max": 576.7106323242188, "pos_frac": 0.84375, "sample": [114.91873168945312, 300.5536193847656, 101.8197021484375, 37.375160217285156, 311.07928466796875, 175.50265502929688, 576.7106323242188, -117.1655502319336, 39.865352630615234, -173.40025329589844, 76.2650375366211, 33.216651916503906, 51.9310188293457, 6.745546340942383, 155.15908813476562, 301.51934814453125, 281.82269287109375, 27.967918395996094, 43.539031982421875, 56.83788299560547, 36.25579833984375, 330.3367919921875, 138.6215362548828, -65.95455169677734, 96.54495239257812, -59.00511169433594, 177.47369384765625, -24.0943603515625, 265.7384033203125, -82.48795318603516, 4.844247817993164, 7.259468078613281, 80.36410522460938, 64.48782348632812, 240.25741577148438, 45.26451873779297, 40.504974365234375, 29.972888946533203, 25.813491821289062, 160.33477783203125, 282.1127014160156, 133.3482666015625, 31.144508361816406, 184.79698181152344, 62.443023681640625, 112.0557861328125, 74.5225830078125, -41.14610290527344, 3.184307098388672, 157.0722198486328, -76.58763122558594, 65.930908203125, 228.49411010742188, 119.89405822753906, 43.28790283203125, 46.75300598144531, 8.609594345092773, 497.29547119140625, 230.73342895507812, 382.30694580078125, -15.538337707519531, 185.86936950683594, -57.281097412109375, 1.3034400939941406], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000246.npy"}
|
|
{"epoch": 0.36123348017621143, "step": 247, "batch_size": 64, "mean": 146.94192504882812, "std": 149.2831573486328, "min": -237.87506103515625, "p10": 1.2037830352783205, "median": 115.24252319335938, "p90": 337.39176635742194, "max": 557.0489501953125, "pos_frac": 0.90625, "sample": [-176.03598022460938, 58.29986572265625, -12.649337768554688, 343.44232177734375, 75.73646545410156, 390.2333984375, 155.3203582763672, 288.4188232421875, 13.367731094360352, 273.88079833984375, 320.74652099609375, 401.9101867675781, 148.16024780273438, 271.982421875, 44.08891296386719, 306.1114501953125, 370.1875, 23.206336975097656, 287.771240234375, 279.657958984375, 117.11386108398438, 29.741456985473633, 71.06283569335938, 91.37957000732422, 4.033699035644531, 206.5060272216797, 19.11443328857422, 100.45060729980469, 98.06658172607422, 87.85865783691406, 193.08987426757812, 238.26417541503906, 64.7732925415039, 112.22319030761719, 42.443328857421875, 323.2738037109375, 312.5481872558594, 242.06787109375, 122.84498596191406, -73.68309783935547, 1.1428489685058594, 158.6969757080078, 1.3459625244140625, 117.58787536621094, 54.699241638183594, 25.17894744873047, 113.37118530273438, 281.73486328125, 430.39263916015625, 92.67996215820312, 47.97803497314453, -63.492828369140625, 105.17189025878906, 557.0489501953125, 162.65505981445312, -0.39780426025390625, 150.72586059570312, 250.20350646972656, 174.4004364013672, -237.87506103515625, 404.58551025390625, 12.603744506835938, 98.06007385253906, 198.77496337890625], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000247.npy"}
|
|
{"epoch": 0.36270190895741555, "step": 248, "batch_size": 64, "mean": 126.5531005859375, "std": 108.51148986816406, "min": -63.46198654174805, "p10": -1.48553829193115, "median": 108.29126358032227, "p90": 275.19092102050786, "max": 385.9736328125, "pos_frac": 0.890625, "sample": [249.89520263671875, 312.2289733886719, 65.47679138183594, 177.97149658203125, 143.4713592529297, 0.8879165649414062, 76.92996215820312, 278.7345886230469, 102.85932922363281, 2.9537353515625, 9.059951782226562, 210.92095947265625, 156.17123413085938, 17.059675216674805, 167.51853942871094, 28.95063018798828, 27.77800178527832, -2.5027332305908203, 112.85518646240234, 25.511943817138672, 98.87859344482422, 142.1097412109375, 237.08038330078125, 322.7865905761719, 33.83544921875, -20.831527709960938, 126.41727447509766, 238.7240753173828, 138.6316375732422, 213.40646362304688, 133.6364288330078, -40.62803649902344, 127.3578872680664, 263.43707275390625, 96.54495239257812, 155.05101013183594, 42.89744567871094, 19.857955932617188, 53.34431457519531, 49.3785514831543, 159.21604919433594, 250.32618713378906, 164.62591552734375, 340.9371643066406, 89.44529724121094, 74.93392181396484, 385.9736328125, 260.3143005371094, -3.669097900390625, 309.1243896484375, 8.215736389160156, 90.82740783691406, 282.517333984375, 184.89219665527344, 223.97946166992188, 266.92236328125, -5.048744201660156, 52.49858093261719, 103.72734069824219, 235.287841796875, 62.05046844482422, 43.60536575317383, -14.491741180419922, -63.46198654174805], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000248.npy"}
|
|
{"epoch": 0.3641703377386197, "step": 249, "batch_size": 64, "mean": 76.4837646484375, "std": 138.95516967773438, "min": -302.75115966796875, "p10": -77.1719886779785, "median": 85.08627700805664, "p90": 241.8288772583008, "max": 533.0252075195312, "pos_frac": 0.671875, "sample": [-64.20283508300781, 236.26649475097656, -30.359176635742188, -29.99095344543457, 87.52611541748047, -64.4391860961914, 43.8983154296875, 125.94926452636719, 92.93949890136719, 303.1148986816406, -0.018829345703125, 69.73129272460938, 40.02003479003906, 70.26522064208984, 123.19322204589844, -302.75115966796875, -52.45273971557617, 262.9686279296875, 129.86155700683594, -60.10416793823242, 126.41912841796875, 127.13667297363281, -89.84810638427734, 120.21300506591797, -8.648681640625, 266.7689514160156, -91.02485656738281, 232.88136291503906, 164.2252197265625, 137.5584259033203, -16.560176849365234, 15.87094497680664, 190.95477294921875, 21.029586791992188, 82.64643859863281, 0.0937042236328125, 133.71426391601562, -174.57777404785156, 136.75357055664062, 215.72769165039062, 190.95449829101562, -9.8985595703125, 533.0252075195312, -64.94523620605469, -175.44659423828125, -107.62326049804688, 88.7100830078125, -18.521961212158203, 129.0787353515625, 148.6674346923828, 133.49618530273438, 205.78515625, -82.41202545166016, 221.34231567382812, 240.31024169921875, 242.47972106933594, 47.324501037597656, -61.36688232421875, -14.806175231933594, 114.97471618652344, 7.215278625488281, 271.8087463378906, 274.37274169921875, 7.686332702636719], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000249.npy"}
|
|
{"epoch": 0.3656387665198238, "step": 250, "batch_size": 64, "mean": 129.26380920410156, "std": 110.03781127929688, "min": -123.96968841552734, "p10": -18.074092102050763, "median": 119.87617874145508, "p90": 294.7495391845703, "max": 374.81988525390625, "pos_frac": 0.890625, "sample": [228.71231079101562, 71.3077392578125, 30.11602783203125, -28.768630981445312, -68.91865539550781, 158.65150451660156, 91.36090850830078, 296.7498474121094, 121.84365844726562, 0.8264312744140625, 84.59343719482422, 374.81988525390625, 120.3189697265625, 144.6391143798828, 179.53921508789062, 109.44015502929688, 41.21167755126953, 143.67083740234375, 306.40252685546875, 251.92388916015625, 108.40716552734375, -46.52735137939453, -31.18500518798828, 143.5271453857422, 67.26932525634766, 183.13946533203125, 87.56783294677734, 33.41046905517578, 74.6743392944336, 73.50201416015625, 26.688695907592773, 21.268754959106445, 84.77597045898438, 265.87176513671875, -26.17431640625, 366.1907958984375, 166.97340393066406, 107.51830291748047, -50.615440368652344, 22.364356994628906, 124.16740417480469, 290.0821533203125, 195.6830291748047, 185.4006805419922, 113.6473388671875, 59.988487243652344, 187.62075805664062, 119.43338775634766, 188.9971466064453, 97.29058837890625, 103.08642578125, 30.59674835205078, 150.78794860839844, 238.15036010742188, -123.96968841552734, 203.35159301757812, 319.6458435058594, 224.2697296142578, 140.90643310546875, 333.8790588378906, 182.0876922607422, 56.613128662109375, 301.7584228515625, 212.3207244873047], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000250.npy"}
|
|
{"epoch": 0.3671071953010279, "step": 251, "batch_size": 64, "mean": 95.0240478515625, "std": 139.0580291748047, "min": -262.22930908203125, "p10": -62.91937561035154, "median": 83.13677215576172, "p90": 273.07224426269534, "max": 486.2834777832031, "pos_frac": 0.78125, "sample": [318.6758117675781, 8.653762817382812, 306.0689697265625, 30.79979705810547, 193.27157592773438, -106.79344177246094, -262.22930908203125, 486.2834777832031, 34.42613983154297, 168.25534057617188, 171.31024169921875, -85.9010238647461, 251.4228057861328, 48.82817077636719, 75.47013854980469, -38.9866943359375, -217.41452026367188, 361.7471008300781, -17.353689193725586, 137.03941345214844, 18.785457611083984, 228.9481658935547, 1.2066574096679688, 37.003211975097656, -9.54730224609375, 92.48020935058594, -73.68550109863281, -73.40103149414062, 133.48233032226562, 273.44293212890625, 18.87054443359375, -73.17623901367188, 272.2073059082031, -8.35040283203125, 70.64087677001953, 176.84884643554688, 47.250144958496094, 110.10633087158203, 105.42752075195312, 130.69924926757812, -8.06390380859375, 150.6066131591797, 10.789831161499023, 242.66990661621094, 0.9532623291015625, 295.7554626464844, 135.06019592285156, 168.12742614746094, 62.975196838378906, 96.5671157836914, -25.080739974975586, 24.91626739501953, 441.4683837890625, 109.7445068359375, 234.87896728515625, 196.885498046875, -15.884481430053711, 175.40423583984375, 90.80340576171875, 11.886119842529297, 40.53527069091797, 134.67861938476562, 66.30609893798828, 96.74264526367188], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000251.npy"}
|
|
{"epoch": 0.368575624082232, "step": 252, "batch_size": 64, "mean": 125.59085083007812, "std": 111.2602767944336, "min": -352.14337158203125, "p10": 2.26986312866211, "median": 131.75080108642578, "p90": 253.99505920410158, "max": 329.9617004394531, "pos_frac": 0.90625, "sample": [256.2035217285156, 264.6473083496094, 77.71150207519531, 189.88043212890625, 222.2344512939453, 46.8206787109375, 155.930419921875, 237.33175659179688, 77.25143432617188, 173.21649169921875, 138.92713928222656, 34.433570861816406, 149.6881103515625, 35.061126708984375, 108.42420959472656, 147.0352783203125, 82.09428405761719, 81.94731140136719, 173.63632202148438, 2.7290725708007812, 228.9364776611328, 163.6788787841797, 16.668804168701172, 288.2455749511719, 220.11563110351562, -68.40956115722656, -14.347391128540039, 114.97201538085938, 2.07305908203125, 132.85255432128906, -9.054264068603516, 131.07540893554688, 70.08683776855469, 180.66311645507812, 113.32958984375, 99.9810791015625, 104.60238647460938, 302.88330078125, 329.9617004394531, 273.6762390136719, 56.447471618652344, 99.7132568359375, 144.86582946777344, 36.32850646972656, 149.84234619140625, 248.5641632080078, 248.84197998046875, 181.2048797607422, 131.2670135498047, 202.6514892578125, 3.6310272216796875, -352.14337158203125, 245.5872039794922, 246.38954162597656, 71.49334716796875, 71.78826904296875, -6.611024856567383, 132.23458862304688, 324.369140625, -11.42391586303711, 69.67567443847656, 46.719398498535156, 205.233642578125, 153.94790649414062], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000252.npy"}
|
|
{"epoch": 0.3700440528634361, "step": 253, "batch_size": 64, "mean": 116.15472412109375, "std": 139.11810302734375, "min": -314.1762390136719, "p10": -35.696011734008785, "median": 118.35677719116211, "p90": 290.14511718750003, "max": 481.26153564453125, "pos_frac": 0.796875, "sample": [-33.38861846923828, 20.320167541503906, 212.35440063476562, 24.994705200195312, 12.343475341796875, 134.15048217773438, 194.02340698242188, 80.31768035888672, 211.05091857910156, 90.81526184082031, 92.41923522949219, 481.26153564453125, 184.79620361328125, 49.239830017089844, 141.003662109375, 136.68218994140625, 316.63665771484375, 73.47195434570312, -46.15074157714844, 90.8175277709961, -17.849912643432617, -314.1762390136719, 161.47161865234375, 310.5138244628906, 251.56576538085938, 8.77192497253418, 387.41448974609375, -15.887413024902344, -9.296463012695312, 28.914714813232422, 14.089622497558594, -30.373838424682617, 123.78489685058594, 201.04953002929688, 269.0256042480469, 202.42706298828125, 188.11126708984375, -94.81110382080078, 269.99176025390625, 112.62430572509766, 202.41485595703125, 293.2027587890625, 116.12206268310547, 242.8466796875, 283.0106201171875, 80.16412353515625, 162.72596740722656, 324.994140625, 92.73775482177734, -193.29550170898438, 207.10366821289062, 143.71456909179688, -66.46670532226562, 59.618019104003906, -39.06549072265625, 64.6931381225586, 131.03456115722656, 156.87474060058594, 200.26841735839844, -28.356414794921875, -36.68489456176758, 39.384857177734375, 361.7477111816406, 120.59149169921875], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000253.npy"}
|
|
{"epoch": 0.37151248164464024, "step": 254, "batch_size": 64, "mean": 106.33760833740234, "std": 167.06167602539062, "min": -159.5920867919922, "p10": -73.33864669799804, "median": 79.90167617797852, "p90": 313.2072021484375, "max": 640.9267578125, "pos_frac": 0.734375, "sample": [57.28131103515625, -29.040985107421875, 139.5233154296875, 75.7264175415039, 93.95211029052734, 121.34133911132812, -24.264236450195312, -73.17029571533203, 104.20396423339844, -0.5016098022460938, 113.1930923461914, 174.0845489501953, 165.79763793945312, 80.66108703613281, -73.41079711914062, -159.5920867919922, 187.93850708007812, -4.751415252685547, 112.12088012695312, 168.7047119140625, -28.517391204833984, 194.90538024902344, 424.3281555175781, 276.5868225097656, 307.6925964355469, -136.9945068359375, 314.5611572265625, 310.0479736328125, -40.992103576660156, 8.152643203735352, 65.55863189697266, 194.74185180664062, 79.14226531982422, -142.94793701171875, 20.597354888916016, 192.16281127929688, -42.84955596923828, 383.1456298828125, -101.08551025390625, 43.43678665161133, -10.939323425292969, -157.5777587890625, 49.69468688964844, 445.23370361328125, 22.00518226623535, 210.7530975341797, 5.007261276245117, 279.7811279296875, 31.04570960998535, 150.14144897460938, 640.9267578125, 94.67829895019531, 60.680816650390625, 6.835540771484375, -136.11349487304688, 493.585693359375, 64.97738647460938, 191.70693969726562, 166.9158477783203, 87.05201721191406, -62.22760009765625, 477.1805725097656, 130.84034729003906, 11.95220947265625], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000254.npy"}
|
|
{"epoch": 0.37298091042584436, "step": 255, "batch_size": 64, "mean": 112.41537475585938, "std": 155.29124450683594, "min": -353.0177001953125, "p10": -36.05047607421875, "median": 90.80997467041016, "p90": 305.0670532226562, "max": 480.0543212890625, "pos_frac": 0.796875, "sample": [452.1204833984375, -83.60711669921875, 5.7796630859375, 245.95701599121094, 143.86676025390625, -25.210220336914062, -32.515281677246094, 0.2253265380859375, -353.0177001953125, 23.643199920654297, 263.2137451171875, 103.30154418945312, 1.8817825317382812, 28.14974594116211, 220.26486206054688, 187.824462890625, -156.88134765625, -213.69317626953125, 127.08056640625, 266.1725769042969, 193.0413818359375, 35.48966979980469, 5.359474182128906, 34.49551773071289, 86.27217102050781, -10.837272644042969, -102.81547546386719, 341.46722412109375, -3.127513885498047, 201.86373901367188, 90.57933044433594, 171.977294921875, 121.98663330078125, 289.1966247558594, 323.90301513671875, 256.6803283691406, 252.40029907226562, 323.52203369140625, 304.540771484375, 480.0543212890625, 24.76691436767578, -23.43519401550293, 259.80084228515625, 133.65512084960938, 18.17013168334961, 154.8873291015625, 91.04061889648438, 189.19784545898438, -115.60447692871094, 305.2926025390625, 355.86895751953125, 76.44207763671875, 118.86812591552734, 40.38184356689453, -24.996910095214844, 84.56158447265625, 84.52655792236328, 77.75077056884766, 34.515037536621094, 147.50045776367188, -37.56555938720703, 77.23094177246094, 252.35745239257812, 268.764404296875], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000255.npy"}
|
|
{"epoch": 0.3744493392070485, "step": 256, "batch_size": 64, "mean": 124.12705993652344, "std": 142.98541259765625, "min": -192.03216552734375, "p10": -29.35426445007324, "median": 114.01909637451172, "p90": 307.3527435302735, "max": 492.6063232421875, "pos_frac": 0.8125, "sample": [100.36036682128906, 492.6063232421875, 111.23917388916016, 196.98216247558594, 174.72586059570312, 231.55368041992188, 30.646394729614258, 171.52374267578125, -28.874042510986328, 26.975418090820312, 245.83509826660156, 222.6903076171875, 131.22341918945312, 261.6007995605469, 220.46585083007812, -99.78703308105469, 60.551021575927734, 229.4934844970703, -192.03216552734375, 37.383480072021484, 188.03208923339844, 84.69651794433594, -93.18553161621094, 246.58106994628906, 55.28647232055664, 245.61428833007812, -31.76782989501953, -24.590301513671875, -56.26231384277344, 129.91343688964844, 339.8757629394531, -2.789276123046875, 183.98175048828125, 90.34542846679688, 27.679481506347656, 116.79901885986328, 323.2670593261719, 3.0855560302734375, 70.23330688476562, 46.9774169921875, -140.75901794433594, 9.792266845703125, 21.253517150878906, 37.224273681640625, 106.26904296875, 17.49276351928711, 166.14865112304688, 134.04104614257812, -17.305999755859375, 218.6353759765625, -29.560073852539062, 126.93276977539062, 27.96459197998047, 180.9349365234375, 296.55621337890625, 373.7351989746094, 140.3636932373047, 57.480464935302734, 470.8924560546875, 445.38287353515625, 182.35702514648438, 311.9798278808594, -19.24843406677246, 256.6311340332031], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000256.npy"}
|
|
{"epoch": 0.37591776798825255, "step": 257, "batch_size": 64, "mean": 124.52272033691406, "std": 147.162841796875, "min": -179.28591918945312, "p10": -25.957139205932613, "median": 99.00992584228516, "p90": 351.1807678222657, "max": 519.4136962890625, "pos_frac": 0.796875, "sample": [359.78173828125, -4.784236907958984, 50.91831970214844, 92.29153442382812, 297.62347412109375, 9.689178466796875, 205.923583984375, 309.53814697265625, 211.03439331054688, 148.6922607421875, -16.404165267944336, -40.013343811035156, 83.11903381347656, 267.05841064453125, 180.58987426757812, 43.292724609375, 177.77333068847656, 193.65032958984375, 7.320568084716797, 359.5370788574219, 68.69172668457031, 365.6590881347656, 268.7847900390625, -117.48295593261719, 88.47112274169922, 107.61213684082031, -52.74113082885742, 2.2554092407226562, -21.082584381103516, 396.3023986816406, 173.8455352783203, 203.943603515625, 243.05902099609375, 519.4136962890625, 76.6910171508789, -28.046234130859375, 104.87992858886719, 14.915975570678711, 132.26223754882812, -179.28591918945312, 81.9260482788086, 331.6827087402344, 18.459674835205078, 222.3375244140625, 93.13992309570312, 161.69265747070312, 66.34100341796875, 4.759635925292969, 411.2198486328125, 190.77606201171875, 184.1636962890625, 257.7198486328125, -3.5816879272460938, 106.41997528076172, 47.71672058105469, 405.0174865722656, -56.96599197387695, -10.248615264892578, 120.07258605957031, -153.04432678222656, 26.812026977539062, 9.213409423828125, -16.548446655273438, 165.59149169921875], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000257.npy"}
|
|
{"epoch": 0.37738619676945667, "step": 258, "batch_size": 64, "mean": 115.47129821777344, "std": 147.73301696777344, "min": -305.1628723144531, "p10": -42.68547859191894, "median": 108.36536407470703, "p90": 285.8395172119141, "max": 490.7012023925781, "pos_frac": 0.8125, "sample": [241.45327758789062, 28.87751579284668, -46.68880844116211, 238.29400634765625, 60.88232421875, 111.46143341064453, 207.11941528320312, 78.03836059570312, 169.14552307128906, 0.7874908447265625, -31.65668296813965, -27.165260314941406, -14.241338729858398, 77.38734436035156, 81.65377807617188, -18.950820922851562, -57.424415588378906, 101.12646484375, 173.38233947753906, 23.32714080810547, 34.21568298339844, 235.56573486328125, 424.99725341796875, 177.8205108642578, 304.6296081542969, 246.84591674804688, 87.7200927734375, 27.843338012695312, 73.97311401367188, 274.8458557128906, 152.35484313964844, 166.7955322265625, 250.9667205810547, 134.8928985595703, -162.80075073242188, 78.57461547851562, 244.95443725585938, -294.54107666015625, 31.99280548095703, 105.26929473876953, 299.8926086425781, 1.9425697326660156, -33.34437561035156, 290.55108642578125, 38.29066848754883, -305.1628723144531, 490.7012023925781, 172.94723510742188, -105.15159606933594, 120.36608123779297, 152.53277587890625, -48.21504211425781, 228.49827575683594, 252.5550537109375, 336.92877197265625, 60.632591247558594, 255.84825134277344, 322.6959533691406, 160.93792724609375, 219.28733825683594, 64.12318420410156, 69.93062591552734, 222.626220703125, 127.02289581298828], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000258.npy"}
|
|
{"epoch": 0.3788546255506608, "step": 259, "batch_size": 64, "mean": 104.06440734863281, "std": 139.8522186279297, "min": -153.96636962890625, "p10": -65.12140579223632, "median": 69.98472213745117, "p90": 265.5872009277344, "max": 435.50439453125, "pos_frac": 0.765625, "sample": [193.93472290039062, -6.428152084350586, 153.11459350585938, 142.68455505371094, 31.252605438232422, -153.96636962890625, 232.07254028320312, 389.77783203125, 343.21356201171875, 183.2039794921875, 41.73821258544922, -67.2416000366211, 146.90960693359375, -43.821937561035156, 19.00750732421875, -74.55062866210938, 243.18710327148438, -31.881500244140625, 372.516357421875, 162.60073852539062, 110.09048461914062, 110.96923828125, 47.69207763671875, 60.92405319213867, 237.8252716064453, 19.900951385498047, 57.12726593017578, 75.43366241455078, 435.50439453125, -100.15675354003906, 41.043731689453125, 47.687721252441406, 403.90362548828125, 176.74642944335938, 40.098106384277344, -35.91822814941406, 116.51720428466797, 265.66815185546875, -93.51270294189453, 288.188720703125, -91.98262023925781, 17.13713836669922, 260.04974365234375, -5.127254486083984, 64.53578186035156, 33.80604553222656, 265.3983154296875, 247.40972900390625, -139.04286193847656, 103.05238342285156, -13.03516960144043, 259.88385009765625, 250.12420654296875, 1.5129547119140625, 264.71514892578125, -60.174285888671875, 83.09303283691406, 229.1090087890625, 129.53887939453125, -55.201377868652344, 62.425777435302734, 10.880062103271484, 124.94849395751953, 34.008216857910156], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000259.npy"}
|
|
{"epoch": 0.3803230543318649, "step": 260, "batch_size": 64, "mean": 124.72775268554688, "std": 133.95245361328125, "min": -148.54498291015625, "p10": 1.3826255798339897, "median": 101.05036926269531, "p90": 304.48128051757817, "max": 588.1268920898438, "pos_frac": 0.890625, "sample": [216.210205078125, -61.684837341308594, 18.83484649658203, -36.25787353515625, 28.38088035583496, 133.45132446289062, 119.56334686279297, 176.4209747314453, 208.75042724609375, 47.157958984375, 50.76173782348633, 425.26934814453125, 82.38055419921875, 78.2506103515625, 117.05766296386719, 588.1268920898438, 190.6891632080078, 139.28323364257812, 13.35101318359375, 350.6561279296875, -148.54498291015625, 102.86016082763672, 16.098024368286133, 177.35887145996094, -70.0789566040039, -26.858924865722656, 158.77989196777344, -5.1136627197265625, 186.40982055664062, 100.17266845703125, 101.51475524902344, 280.526123046875, 7.385154724121094, 160.63584899902344, 114.38948059082031, 297.9666748046875, 328.4553527832031, 84.99791717529297, 100.58598327636719, 62.2725830078125, 33.14350509643555, 68.10545349121094, 256.4881591796875, 27.140357971191406, 125.91899871826172, 42.60256576538086, 6.4500579833984375, 67.48109436035156, 110.65081024169922, 174.28355407714844, 144.23410034179688, 152.16683959960938, 53.74089813232422, 358.7091979980469, 41.40955352783203, -0.7891311645507812, 18.2265625, 257.01495361328125, 13.057559967041016, 471.6151123046875, 56.26403045654297, 307.27325439453125, 38.28291320800781, 242.6393585205078], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000260.npy"}
|
|
{"epoch": 0.38179148311306904, "step": 261, "batch_size": 64, "mean": 128.69786071777344, "std": 146.355224609375, "min": -246.75604248046875, "p10": -28.198505783081046, "median": 117.37301254272461, "p90": 299.0024444580078, "max": 630.7280883789062, "pos_frac": 0.828125, "sample": [293.6250915527344, 297.7511901855469, 42.229331970214844, -106.18019104003906, 96.38493347167969, 630.7280883789062, 56.93058776855469, 349.129638671875, -21.71877670288086, 148.951171875, 58.26698303222656, 62.04933166503906, -40.149505615234375, 299.5386962890625, 84.53538513183594, 263.47406005859375, 200.3834228515625, 245.59628295898438, 315.49676513671875, -30.97553253173828, 120.71792602539062, 74.95449829101562, 146.70106506347656, 136.90228271484375, 163.061767578125, 290.79827880859375, 341.1197509765625, 57.021392822265625, 291.5697326660156, 132.0852813720703, 3.53558349609375, 72.55164337158203, 8.929389953613281, 113.05061340332031, 142.37718200683594, 30.411996841430664, 254.70880126953125, 272.9293212890625, 176.38499450683594, 39.741798400878906, 254.0701904296875, -2.1185874938964844, 301.8955383300781, 294.94659423828125, 18.878347396850586, 221.81167602539062, -81.61880493164062, 166.90811157226562, 6.994880676269531, 190.25161743164062, 121.31368255615234, 391.25762939453125, 40.99437713623047, -246.75604248046875, -151.51290893554688, 101.83586883544922, -63.254852294921875, -8.420671463012695, 114.0280990600586, 196.57701110839844, 58.74651336669922, 70.4978256225586, 126.66618347167969, -2.92901611328125], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000261.npy"}
|
|
{"epoch": 0.3832599118942731, "step": 262, "batch_size": 64, "mean": 105.2766342163086, "std": 125.43441009521484, "min": -219.83795166015625, "p10": -35.305498886108396, "median": 117.15364837646484, "p90": 223.98789672851566, "max": 497.58746337890625, "pos_frac": 0.8125, "sample": [36.30218505859375, 257.4239196777344, 92.41214752197266, 191.12838745117188, -146.65858459472656, 228.12893676757812, 402.8465576171875, 103.90605163574219, 131.17684936523438, 190.31414794921875, -120.19341278076172, -219.83795166015625, 97.83360290527344, 497.58746337890625, 54.3388671875, -125.99849700927734, 182.2754364013672, 197.23858642578125, -31.65578269958496, 169.7903289794922, 137.357177734375, 48.91334533691406, -79.98983764648438, 158.90283203125, 121.80946350097656, -14.811752319335938, 36.59843063354492, 354.8766784667969, 140.6378936767578, 274.40655517578125, 43.782379150390625, 65.6637191772461, 54.58259582519531, -32.6102409362793, 97.64442443847656, 84.78336334228516, 79.5523681640625, 31.2598876953125, 188.71145629882812, 173.5326690673828, 129.01998901367188, 153.1464385986328, 86.15123748779297, 190.13525390625, 195.620361328125, 149.65045166015625, 177.42333984375, 75.1451644897461, -36.460609436035156, -25.43082618713379, 38.38844299316406, 127.05918884277344, 9.351303100585938, 238.12945556640625, -94.672119140625, 18.10529327392578, 112.89385986328125, 121.41343688964844, 210.48167419433594, 214.32546997070312, 173.75035095214844, -15.202133178710938, 189.9759063720703, 145.34085083007812], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000262.npy"}
|
|
{"epoch": 0.38472834067547723, "step": 263, "batch_size": 64, "mean": 153.16665649414062, "std": 146.68362426757812, "min": -222.1808319091797, "p10": -9.533815765380844, "median": 141.90318298339844, "p90": 362.71106262207036, "max": 449.95147705078125, "pos_frac": 0.890625, "sample": [233.83578491210938, -222.1808319091797, 162.22006225585938, 120.65837097167969, 282.00311279296875, 35.41956329345703, 302.58648681640625, 75.57833862304688, 16.776321411132812, 134.20407104492188, -112.7373046875, 144.69862365722656, 63.73206329345703, 235.1688690185547, 241.8192138671875, 139.1077423095703, 16.512523651123047, 241.77503967285156, 103.62183380126953, 201.10702514648438, 6.513723373413086, 9.66131591796875, 81.1637954711914, 293.2123107910156, -49.09831619262695, -23.716705322265625, 199.74554443359375, 128.1846160888672, 6.462593078613281, 87.18838500976562, 66.35847473144531, 41.485069274902344, 365.7907409667969, 68.50889587402344, 43.34490966796875, 355.525146484375, 270.52532958984375, 103.59994506835938, 340.2809143066406, 218.3094482421875, 68.29341888427734, 386.8956298828125, 100.64762878417969, 263.8692321777344, 340.5874938964844, 155.86093139648438, 429.10498046875, 8.086074829101562, 205.0718536376953, 244.2052001953125, 239.35885620117188, 377.9928283691406, 449.95147705078125, -117.2620849609375, -16.389419555664062, 407.58203125, 10.601951599121094, -22.59119415283203, 193.7609100341797, 197.20269775390625, 155.75389099121094, 447.8950500488281, 146.06410217285156, 101.16983032226562], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000263.npy"}
|
|
{"epoch": 0.38619676945668135, "step": 264, "batch_size": 64, "mean": 126.249755859375, "std": 139.1712188720703, "min": -206.36502075195312, "p10": -12.08830833435058, "median": 101.19155502319336, "p90": 322.1023162841797, "max": 494.9957275390625, "pos_frac": 0.84375, "sample": [194.85565185546875, 282.87530517578125, 50.3373908996582, 411.926025390625, 111.51748657226562, -146.56866455078125, -14.281837463378906, 348.2739562988281, 93.6669921875, 319.8660888671875, 25.717506408691406, 46.39934539794922, 101.6532974243164, -5.789360046386719, 129.39691162109375, 98.52144622802734, 230.90396118164062, 264.1424865722656, 292.8233337402344, 222.2901153564453, 109.3748779296875, 100.72981262207031, 139.950927734375, 155.9536590576172, 58.47218322753906, 65.40377807617188, -6.400352478027344, 50.15089416503906, 53.13910675048828, 121.24540710449219, -18.564348220825195, 22.90143585205078, 65.87288665771484, 373.3101806640625, 3.7379379272460938, 413.1008605957031, 104.09500885009766, 128.3690185546875, 55.72987365722656, 201.10304260253906, 98.10061645507812, 120.51034545898438, 88.04293823242188, 49.254913330078125, 42.591773986816406, 494.9957275390625, 255.87493896484375, -206.36502075195312, 91.05179595947266, -92.5586929321289, 213.0074920654297, 41.7047004699707, 178.57537841796875, 400.1900634765625, -38.08258056640625, 103.97940063476562, 72.3743667602539, 131.30517578125, -92.93095397949219, 323.0606994628906, 284.7892150878906, 220.1581573486328, -6.970073699951172, 51.12016296386719], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000264.npy"}
|
|
{"epoch": 0.3876651982378855, "step": 265, "batch_size": 64, "mean": 125.38917541503906, "std": 177.45335388183594, "min": -336.17498779296875, "p10": -42.056617355346674, "median": 114.62328338623047, "p90": 356.66972656250005, "max": 622.8463134765625, "pos_frac": 0.78125, "sample": [163.828125, 333.4910583496094, 298.9273681640625, 200.03334045410156, 100.64469146728516, 117.40019226074219, 250.53314208984375, 6.405082702636719, 496.0342712402344, 91.23443603515625, 221.15493774414062, -160.8834228515625, 8.09323501586914, -36.55020523071289, -16.35614013671875, 23.620136260986328, 165.81661987304688, -80.09764099121094, 98.47283172607422, -47.657562255859375, 231.99130249023438, 622.8463134765625, -336.17498779296875, 411.80889892578125, -28.388427734375, 159.13650512695312, 77.72312927246094, 389.7824401855469, -38.81278610229492, 604.2838134765625, -219.7439422607422, 184.29876708984375, 43.575653076171875, -23.58773422241211, 9.79046630859375, -23.802894592285156, 25.131240844726562, 38.554229736328125, 284.3061218261719, 29.514846801757812, 373.5197448730469, 69.99005126953125, 55.348785400390625, 296.1903991699219, -43.44683074951172, 111.84637451171875, 7.76318359375, 140.70028686523438, -12.326065063476562, 173.97817993164062, 63.276756286621094, 123.3331298828125, 183.27798461914062, 359.7781982421875, 125.731201171875, 129.4897918701172, 184.9465789794922, 162.2931365966797, 119.45651245117188, -152.6075897216797, 349.4166259765625, 72.92611694335938, 263.7479248046875, 189.8997802734375], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000265.npy"}
|
|
{"epoch": 0.3891336270190896, "step": 266, "batch_size": 64, "mean": 114.81865692138672, "std": 148.8333740234375, "min": -192.88885498046875, "p10": -39.92065048217773, "median": 74.36821746826172, "p90": 333.7099731445313, "max": 485.0703430175781, "pos_frac": 0.796875, "sample": [29.798141479492188, 453.8082275390625, 65.93041229248047, 79.17342376708984, 179.6758270263672, 156.5710906982422, 64.05177307128906, 263.4580078125, 148.86837768554688, 339.6474609375, 132.8179473876953, 291.0599365234375, -25.018234252929688, 282.0101013183594, -96.97080993652344, 48.37651824951172, 287.4748840332031, 93.81771850585938, 158.19210815429688, 21.98418426513672, 97.66401672363281, -192.88885498046875, -35.43683624267578, 351.47625732421875, 175.5862579345703, 297.0390319824219, 64.07078552246094, 21.803504943847656, 46.59864044189453, 352.1761169433594, 419.1709899902344, 190.93023681640625, 485.0703430175781, 319.8558349609375, 175.66390991210938, -22.844696044921875, 124.98707580566406, 231.8319091796875, 35.92804718017578, 122.09947967529297, 62.299591064453125, 101.30459594726562, 69.5630111694336, 42.97486114501953, 113.9532470703125, 1.5438117980957031, -135.23672485351562, -141.94036865234375, 194.3630828857422, 45.30199432373047, -41.84228515625, -66.99578094482422, 31.343704223632812, 65.220458984375, -14.121482849121094, -15.111335754394531, -10.054376602172852, 26.62427520751953, -92.0488052368164, 392.5511169433594, 211.82192993164062, 200.65325927734375, 4.905851364135742, 65.81153869628906], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000266.npy"}
|
|
{"epoch": 0.39060205580029367, "step": 267, "batch_size": 64, "mean": 127.50728607177734, "std": 147.1898956298828, "min": -344.0007019042969, "p10": -12.035635375976552, "median": 123.72353744506836, "p90": 296.9936920166016, "max": 517.26611328125, "pos_frac": 0.875, "sample": [78.62877655029297, 170.72622680664062, 222.7967987060547, 13.819133758544922, 121.78700256347656, -29.40290641784668, 207.85971069335938, 196.78500366210938, -16.483184814453125, 42.06044387817383, 36.0665283203125, -1.65802001953125, -81.09518432617188, 298.8248291015625, 136.90234375, 145.55987548828125, 34.009849548339844, 197.51950073242188, 386.1471862792969, 496.00360107421875, 86.86985778808594, 86.75684356689453, 126.0911636352539, -17.461334228515625, -28.195022583007812, 94.101806640625, 72.98371887207031, 66.07898712158203, 238.3589630126953, 174.7659149169922, 372.71307373046875, 334.3891296386719, 517.26611328125, 84.41363525390625, 1.8218498229980469, 211.80007934570312, 40.47153854370117, 18.278099060058594, 29.56179428100586, 157.21798706054688, 254.00485229492188, 11.244430541992188, 0.44582366943359375, 468.3892517089844, -344.0007019042969, 4.427345275878906, -188.26283264160156, 182.60818481445312, 138.0718994140625, 120.0836181640625, 148.76670837402344, 183.34756469726562, 36.922603607177734, 279.17706298828125, 125.66007232666016, 143.59228515625, 38.02552795410156, 292.7210388183594, 67.66068267822266, 130.89651489257812, 231.08734130859375, 150.0268096923828, 244.17706298828125, 116.25150299072266], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000267.npy"}
|
|
{"epoch": 0.3920704845814978, "step": 268, "batch_size": 64, "mean": 116.30879974365234, "std": 154.71170043945312, "min": -260.5708312988281, "p10": -44.887448120117185, "median": 117.85405349731445, "p90": 292.3899139404297, "max": 583.3814697265625, "pos_frac": 0.796875, "sample": [20.959144592285156, 320.36962890625, 119.86797332763672, -46.391334533691406, 52.49180603027344, 76.46053314208984, 144.5332794189453, 110.79737854003906, 294.8503112792969, 143.75650024414062, 167.28262329101562, -189.69802856445312, 150.0020751953125, 20.283004760742188, 239.94830322265625, -78.633056640625, 78.72554779052734, 60.37420654296875, -180.93096923828125, 134.29859924316406, 133.24920654296875, 583.3814697265625, 32.38523483276367, 171.70018005371094, 61.45917892456055, 184.27867126464844, 286.64898681640625, 448.30279541015625, -29.367950439453125, 260.3501892089844, -90.86356353759766, 37.051849365234375, 16.527923583984375, -5.195728302001953, 445.64263916015625, 98.49072265625, 125.48062896728516, 157.6653289794922, 170.2200927734375, 216.4846954345703, 160.7028350830078, 207.13897705078125, 9.144203186035156, -53.54901123046875, 219.5375213623047, 69.7239990234375, 188.2753143310547, 321.0895080566406, -26.613845825195312, 160.51565551757812, 139.53350830078125, 115.84013366699219, 534.259765625, 182.04513549804688, 78.847900390625, -9.623825073242188, 73.3735580444336, -41.378379821777344, 226.683837890625, 1.3631458282470703, 107.6826171875, -260.5708312988281, -36.78120803833008, 133.2823944091797], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000268.npy"}
|
|
{"epoch": 0.3935389133627019, "step": 269, "batch_size": 64, "mean": 155.22140502929688, "std": 174.2900848388672, "min": -276.107421875, "p10": -46.0761058807373, "median": 157.03008270263672, "p90": 381.0892333984375, "max": 552.7161254882812, "pos_frac": 0.8125, "sample": [135.3387908935547, 335.4925231933594, 192.1784210205078, 211.08363342285156, -48.19109344482422, 213.23851013183594, 301.24798583984375, 157.95281982421875, 22.342117309570312, -47.571861267089844, 239.12977600097656, -10.513923645019531, 380.28173828125, 61.44584655761719, 352.22198486328125, 171.87005615234375, 6.911033630371094, 128.8006134033203, 13.672119140625, -276.107421875, 247.1617431640625, 25.571460723876953, 204.6396484375, -191.05551147460938, -30.975879669189453, 328.34332275390625, 9.466850280761719, 456.5202941894531, 445.01812744140625, 113.705322265625, 170.22793579101562, 41.82965087890625, 381.435302734375, 291.20648193359375, 52.79414367675781, 286.509765625, 321.6369323730469, 95.61066436767578, 278.7305908203125, 0.13525390625, -0.06836700439453125, 156.1073455810547, -106.24876403808594, 11.271331787109375, 138.69285583496094, 422.97528076171875, 228.70623779296875, 374.46923828125, 291.4063415527344, 46.4925537109375, 240.07122802734375, 155.18914794921875, 53.708160400390625, 552.7161254882812, -69.18341064453125, 403.3717041015625, -0.43133544921875, 35.45336151123047, -42.58600997924805, 174.9666748046875, 361.2596130371094, 386.1211853027344, -155.0099334716797, 205.38314819335938], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000269.npy"}
|
|
{"epoch": 0.39500734214390604, "step": 270, "batch_size": 64, "mean": 110.53462982177734, "std": 154.56655883789062, "min": -287.5572204589844, "p10": -55.1560920715332, "median": 98.63055419921875, "p90": 322.89771728515626, "max": 548.01123046875, "pos_frac": 0.78125, "sample": [243.15664672851562, 267.187255859375, 349.1520080566406, -240.04031372070312, -287.5572204589844, 11.346664428710938, 36.01085662841797, 210.29922485351562, 127.04389190673828, 241.23373413085938, 324.6907958984375, -24.6168212890625, 68.97461700439453, 160.828369140625, 21.2606201171875, 30.68462371826172, 103.30562591552734, 168.92361450195312, -69.26065826416016, 142.57058715820312, 281.18524169921875, 115.81712341308594, -25.98412322998047, 216.16030883789062, 347.65777587890625, 198.5755615234375, 7.179616928100586, 82.88031768798828, 149.42172241210938, 93.95548248291016, -165.86546325683594, 359.146728515625, 548.01123046875, -85.04463195800781, -56.29850769042969, 189.90924072265625, 306.949462890625, 55.345130920410156, 82.47105407714844, 30.568126678466797, 417.2184753417969, -98.16500854492188, 155.72634887695312, 10.549388885498047, 113.49209594726562, 126.13797760009766, 18.200698852539062, 216.35516357421875, -34.32398986816406, 12.074085235595703, 150.591552734375, 203.41537475585938, 79.71290588378906, -17.024110794067383, 29.082931518554688, 59.410308837890625, -24.2923583984375, -1.807373046875, -52.490455627441406, 337.39129638671875, 151.96292114257812, 199.24728393554688, 85.80140686035156, 318.7138671875], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000270.npy"}
|
|
{"epoch": 0.3964757709251101, "step": 271, "batch_size": 64, "mean": 137.3036346435547, "std": 141.5314483642578, "min": -77.5377197265625, "p10": -4.277227783203124, "median": 96.61536026000977, "p90": 350.48298034667977, "max": 549.4757080078125, "pos_frac": 0.859375, "sample": [549.4757080078125, 299.08502197265625, 120.3088607788086, -7.457668304443359, 122.89987182617188, -4.694889068603516, 274.86260986328125, 300.9900817871094, 71.0905532836914, 62.11638641357422, 226.903564453125, -77.5377197265625, 367.19573974609375, -60.281044006347656, 92.32784271240234, 19.67462921142578, 139.61947631835938, 38.176734924316406, 110.34938049316406, -2.4445037841796875, -49.66791534423828, 62.578460693359375, 363.07147216796875, 314.94683837890625, 418.0634765625, 300.2646484375, 56.357444763183594, 12.36314582824707, 84.68898010253906, 156.9366455078125, -3.302684783935547, 79.97505187988281, 109.39923095703125, 9.145347595214844, -56.466583251953125, 100.90287780761719, 34.28541564941406, 38.98869323730469, 246.49029541015625, 57.71554183959961, 17.915611267089844, 444.45074462890625, 61.101722717285156, 18.333662033081055, 106.63311767578125, 43.21897506713867, 84.11328125, 111.53092956542969, 273.4876403808594, 129.04043579101562, 423.913330078125, 223.09286499023438, 54.567054748535156, 19.290298461914062, 358.20819091796875, -39.704429626464844, 204.06973266601562, 332.4574890136719, 87.69894409179688, 246.16148376464844, 63.948814392089844, 245.43182373046875, 146.2807159423828, 152.7925262451172], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000271.npy"}
|
|
{"epoch": 0.39794419970631423, "step": 272, "batch_size": 64, "mean": 159.61441040039062, "std": 152.75338745117188, "min": -186.2428436279297, "p10": -33.92111358642578, "median": 162.31664276123047, "p90": 360.4158386230469, "max": 539.6065673828125, "pos_frac": 0.828125, "sample": [213.28306579589844, 166.38409423828125, 4.652082443237305, -55.91883850097656, -34.07145690917969, 362.05010986328125, 168.9506378173828, 374.9307861328125, 340.937744140625, 219.03562927246094, 287.596435546875, 320.4549255371094, -60.02081298828125, 106.20608520507812, -186.2428436279297, 83.34455871582031, 48.08698272705078, 162.43235778808594, 135.87188720703125, 121.8326644897461, -33.5703125, 286.6373291015625, 240.39395141601562, 162.200927734375, 220.23031616210938, 50.31848907470703, 0.5382423400878906, 260.94671630859375, 119.89762115478516, 515.19775390625, 119.32392883300781, 155.6258087158203, 178.5174102783203, 119.87820434570312, 48.731170654296875, 276.62554931640625, 376.3450622558594, -169.0414581298828, 196.2646484375, 160.11459350585938, 539.6065673828125, 54.594451904296875, 398.6458740234375, 136.12135314941406, 192.2100830078125, -61.705360412597656, 239.3992919921875, 57.226558685302734, 223.6374969482422, 297.9094543457031, -24.661422729492188, 168.98086547851562, -0.3044586181640625, -57.34954833984375, -24.73387336730957, 247.28016662597656, 216.9516143798828, 79.4468765258789, 356.6025390625, 90.95069885253906, 181.93734741210938, 312.5799255371094, 421.26275634765625, 103.76061248779297], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000272.npy"}
|
|
{"epoch": 0.39941262848751835, "step": 273, "batch_size": 64, "mean": 161.66452026367188, "std": 157.57662963867188, "min": -183.3116912841797, "p10": -28.217825317382793, "median": 146.10533142089844, "p90": 340.9926574707032, "max": 549.1689453125, "pos_frac": 0.875, "sample": [187.48922729492188, 276.21209716796875, 71.10507202148438, 121.70454406738281, 474.87652587890625, 424.21435546875, 149.83200073242188, -61.45238494873047, 483.0494384765625, -58.81193542480469, 317.7309875488281, 135.31887817382812, 24.676239013671875, 62.30498504638672, 8.238008499145508, 145.95367431640625, 16.791439056396484, 179.6466064453125, 186.65444946289062, 308.7965087890625, 193.73487854003906, 45.330841064453125, 60.41827392578125, 128.01329040527344, 81.35551452636719, 97.1597671508789, 301.55877685546875, 107.33737182617188, 146.25698852539062, 172.40252685546875, 222.7657012939453, 5.483497619628906, 270.3258361816406, 345.3444519042969, 6.48249626159668, 469.4454040527344, 487.9033203125, 182.9521026611328, 148.24453735351562, 242.8516387939453, -131.73587036132812, -35.78617858886719, 208.1315155029297, -64.5658187866211, 213.58807373046875, 304.5401611328125, 308.7645263671875, 43.66313934326172, 84.5850830078125, -183.3116912841797, 549.1689453125, 144.00210571289062, 105.98306274414062, -40.3560791015625, 85.339111328125, 279.6924133300781, 95.75221252441406, 73.83283996582031, 170.58868408203125, -10.558334350585938, 294.4925537109375, 24.288734436035156, 330.8384704589844, 325.8933410644531], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000273.npy"}
|
|
{"epoch": 0.4008810572687225, "step": 274, "batch_size": 64, "mean": 128.41639709472656, "std": 154.07984924316406, "min": -173.99754333496094, "p10": -66.7465835571289, "median": 117.88253021240234, "p90": 355.97175598144537, "max": 482.01531982421875, "pos_frac": 0.765625, "sample": [108.77945709228516, 75.28668212890625, 30.965919494628906, -122.29658508300781, 223.7428436279297, 146.20989990234375, 159.37843322753906, -16.55719757080078, 92.1926498413086, 22.10198974609375, -98.423828125, 346.0309143066406, 248.2452850341797, 195.86093139648438, 236.7900390625, 131.92770385742188, 76.7635726928711, 416.09930419921875, -173.99754333496094, 274.119140625, -66.09126281738281, 167.63294982910156, -43.841827392578125, 104.24589538574219, -97.92696380615234, 267.26177978515625, 360.8138122558594, -54.8857421875, 114.88636016845703, 482.01531982421875, -30.455486297607422, 93.23330688476562, 223.24957275390625, 120.87870025634766, -5.303625106811523, 224.9142303466797, 396.4441833496094, -4.963104248046875, 221.1015167236328, 88.54289245605469, 241.79342651367188, 46.39421081542969, 144.3795623779297, 215.4175262451172, 290.82373046875, -62.19676971435547, 111.22636413574219, 1.3961238861083984, 151.68878173828125, 443.2135925292969, 267.9823303222656, 89.14595031738281, 8.185811996459961, 169.5072021484375, -115.67646026611328, 229.64622497558594, -67.02743530273438, -108.89535522460938, 392.61248779296875, 164.80783081054688, 97.07124328613281, 360.23211669921875, 0.582611083984375, 211.3656005859375], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000274.npy"}
|
|
{"epoch": 0.4023494860499266, "step": 275, "batch_size": 64, "mean": 128.49118041992188, "std": 159.57615661621094, "min": -181.63348388671875, "p10": -66.68112564086913, "median": 99.00883102416992, "p90": 338.96343688964845, "max": 484.08355712890625, "pos_frac": 0.765625, "sample": [184.18687438964844, 66.84428405761719, 238.7659454345703, 16.159061431884766, 28.291967391967773, -55.93329620361328, 318.6396484375, 60.27201843261719, 377.7821960449219, -107.95272064208984, 257.5901184082031, 339.7390441894531, 449.74041748046875, 73.35028839111328, 150.87924194335938, 204.08045959472656, -46.00369644165039, 20.959609985351562, 99.32740020751953, 146.2821807861328, -85.72600555419922, 194.53131103515625, 401.0121154785156, -24.273338317871094, 83.6318130493164, -170.72299194335938, 192.84237670898438, 161.91830444335938, 198.90554809570312, 66.35076141357422, -81.21318817138672, 47.788238525390625, 243.09642028808594, 180.715087890625, 53.848785400390625, -5.076019287109375, -181.63348388671875, 37.99578857421875, -26.757535934448242, 19.129531860351562, 57.635345458984375, 312.8272705078125, -161.05679321289062, 87.07283020019531, 207.7060546875, 353.9077453613281, 174.77725219726562, 484.08355712890625, 215.43289184570312, 394.959716796875, 333.697998046875, -71.28733825683594, 216.5010986328125, 337.1536865234375, 10.310707092285156, 300.2750549316406, 98.69026184082031, 308.86968994140625, -38.279563903808594, 184.83135986328125, 42.590538024902344, 264.3878479003906, -4.79241943359375, -16.22412109375], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000275.npy"}
|
|
{"epoch": 0.40381791483113066, "step": 276, "batch_size": 64, "mean": 139.608154296875, "std": 164.06451416015625, "min": -166.99282836914062, "p10": -39.74769744873046, "median": 132.13843536376953, "p90": 376.3207916259766, "max": 626.9486694335938, "pos_frac": 0.828125, "sample": [107.01822662353516, 626.9486694335938, 71.40650939941406, 454.86627197265625, 428.0560302734375, 45.397377014160156, 7.109283447265625, 448.0932922363281, 18.036359786987305, 187.99420166015625, 154.54605102539062, 37.37708282470703, -41.344573974609375, 253.8814697265625, 276.4466552734375, 229.43502807617188, 172.20394897460938, -155.9354248046875, 94.03158569335938, 41.17900466918945, 30.184947967529297, 224.79917907714844, 156.71348571777344, 307.96844482421875, 29.941940307617188, -166.99282836914062, -0.4853706359863281, 124.50736999511719, 164.14370727539062, 290.3062438964844, -36.02165222167969, 160.43569946289062, 66.03206634521484, 152.69483947753906, 253.76451110839844, 199.42984008789062, -78.80178833007812, -100.45811462402344, 6.850763320922852, 39.88247299194336, -87.37081909179688, -16.243194580078125, 8.269664764404297, 377.8240051269531, 80.00287628173828, 80.88880157470703, 143.29469299316406, 192.9801788330078, 291.927490234375, 35.044105529785156, 372.81329345703125, -2.2513885498046875, 193.9816131591797, 206.85769653320312, 69.87393188476562, 225.71595764160156, 182.01524353027344, 139.76950073242188, 293.1031494140625, 55.732452392578125, -117.03988647460938, 501.93756103515625, 34.3504638671875, 389.80194091796875], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000276.npy"}
|
|
{"epoch": 0.4052863436123348, "step": 277, "batch_size": 64, "mean": 139.88525390625, "std": 151.08511352539062, "min": -157.57057189941406, "p10": -31.28684768676757, "median": 102.29159164428711, "p90": 368.9475341796875, "max": 479.375244140625, "pos_frac": 0.84375, "sample": [1.6589927673339844, 256.0301513671875, -21.102951049804688, 85.08099365234375, 69.41400146484375, 221.52813720703125, 249.43399047851562, 115.87857055664062, -4.2489471435546875, 177.97592163085938, 155.1661376953125, 5.563287734985352, 266.40277099609375, -35.65137481689453, 257.8935852050781, 257.61480712890625, 39.866844177246094, 89.90122985839844, 200.85919189453125, 218.5369415283203, -77.86717987060547, 300.3738708496094, 74.55137634277344, 86.1458511352539, -111.05313873291016, 4.988014221191406, 479.375244140625, 95.13540649414062, 18.77716064453125, 102.755859375, -157.57057189941406, 403.1248779296875, 90.44881439208984, 260.1309509277344, -50.14629364013672, -93.06635284423828, 470.36236572265625, 271.9832458496094, 145.21023559570312, 49.196380615234375, 156.33853149414062, 50.64356994628906, 459.26275634765625, 69.88673400878906, 9.018157958984375, 93.02338409423828, 109.64815521240234, 368.82781982421875, 361.0649108886719, 368.99884033203125, -93.90243530273438, 244.85858154296875, 42.62079620361328, 82.22150421142578, 12.054534912109375, -5.869060516357422, 203.4827423095703, 101.82732391357422, 370.2069396972656, 379.9972839355469, 106.04145812988281, 171.76715087890625, 63.852272033691406, 256.1259460449219], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000277.npy"}
|
|
{"epoch": 0.4067547723935389, "step": 278, "batch_size": 64, "mean": 149.4520263671875, "std": 159.49972534179688, "min": -128.9244384765625, "p10": -26.52446384429931, "median": 126.3302116394043, "p90": 400.14676818847664, "max": 524.1867065429688, "pos_frac": 0.828125, "sample": [-4.97913932800293, 290.16351318359375, -32.384605407714844, 19.652027130126953, 218.5877685546875, 256.3135986328125, 136.40371704101562, 27.692363739013672, 181.83668518066406, 103.76447296142578, 144.06365966796875, 150.5145721435547, 53.104774475097656, 2.023834228515625, 90.87039184570312, 201.90972900390625, -33.209320068359375, 288.2919921875, 368.1027526855469, 3.9391403198242188, 22.59510612487793, 524.1867065429688, 327.43109130859375, 157.24160766601562, 206.6990966796875, 253.36827087402344, 65.17203521728516, -6.861457824707031, 116.25670623779297, 471.01959228515625, 407.4977111816406, -48.34413146972656, 90.4261474609375, -69.58438110351562, 517.7659301757812, 408.2686767578125, 30.15148162841797, 67.83041381835938, 52.350608825683594, 181.22769165039062, 77.28026580810547, 334.40704345703125, 153.12139892578125, 27.981338500976562, 38.7303466796875, 419.7965393066406, 97.15522003173828, 193.7990264892578, 3.1115684509277344, 382.99456787109375, 167.0474090576172, 158.59690856933594, -6.160186767578125, 84.5580062866211, 250.99996948242188, 276.10858154296875, -29.70719337463379, 8.355752944946289, 486.01190185546875, -71.24449157714844, 278.1665954589844, 140.48031616210938, -128.9244384765625, -19.098094940185547], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000278.npy"}
|
|
{"epoch": 0.40822320117474303, "step": 279, "batch_size": 64, "mean": 157.31314086914062, "std": 182.27285766601562, "min": -245.39190673828125, "p10": -56.807560348510734, "median": 144.52376556396484, "p90": 413.8603790283204, "max": 685.8677978515625, "pos_frac": 0.828125, "sample": [83.89717102050781, 21.358850479125977, 31.590045928955078, 98.03018188476562, 12.310638427734375, -21.47283172607422, 428.3240966796875, 365.1921081542969, -46.38800811767578, 7.388568878173828, 9.829582214355469, 167.5493621826172, 152.0457763671875, 45.05680847167969, 421.7403869628906, 495.3739318847656, 42.8676872253418, 188.67745971679688, 238.5196075439453, 197.92245483398438, 545.9539794921875, 82.62068939208984, 23.831274032592773, -167.59310913085938, 35.41619873046875, 86.3754653930664, 57.011932373046875, 148.66705322265625, -28.647674560546875, 457.27288818359375, 154.482421875, 106.51860809326172, 208.83102416992188, 2.119363784790039, 208.32406616210938, 395.47369384765625, 197.0068359375, 685.8677978515625, 448.85858154296875, 247.1998748779297, -88.43562316894531, 45.9739990234375, -75.48162841796875, -245.39190673828125, 140.38047790527344, 372.9410095214844, 159.8450927734375, 276.8858947753906, 318.91351318359375, 137.13568115234375, 329.96820068359375, 283.71728515625, 162.7235107421875, -61.2730827331543, 366.8166198730469, 115.02922821044922, 352.95294189453125, 136.32139587402344, -94.51451873779297, -70.47911071777344, 260.45428466796875, 202.94418334960938, -3.3133277893066406, 208.52232360839844], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000279.npy"}
|
|
{"epoch": 0.40969162995594716, "step": 280, "batch_size": 64, "mean": 160.44956970214844, "std": 143.2413330078125, "min": -137.36537170410156, "p10": -28.81217575073242, "median": 151.152099609375, "p90": 351.65450134277353, "max": 473.75299072265625, "pos_frac": 0.859375, "sample": [144.54087829589844, 219.36497497558594, 96.9531021118164, 225.0208282470703, 38.34971618652344, -58.30042266845703, 263.0856628417969, 367.81072998046875, 387.75738525390625, 37.147491455078125, 233.2866668701172, 39.94654083251953, 25.753734588623047, 268.0400390625, 358.21600341796875, 328.21649169921875, 87.77068328857422, 276.27886962890625, 140.98037719726562, 222.91259765625, 87.05531311035156, 473.75299072265625, 472.7481994628906, 162.83006286621094, 229.22836303710938, 132.359375, 278.34130859375, 264.03521728515625, 4.480499267578125, 254.73565673828125, 128.70681762695312, 159.67034912109375, 392.7188720703125, 119.2938232421875, -25.189208984375, 148.88113403320312, -73.6575927734375, 336.3443298339844, 185.6308135986328, 120.61101531982422, 192.09445190429688, 84.50591278076172, -137.36537170410156, 400.66888427734375, -83.22419738769531, -129.79287719726562, 125.0627212524414, 279.1288757324219, 153.42306518554688, 122.90471649169922, 195.6785430908203, 306.78131103515625, -2.9483203887939453, 85.62952423095703, 10.4432373046875, -30.36487579345703, 154.135986328125, 141.8592987060547, 95.02789306640625, 290.0916748046875, 228.4456787109375, -92.78998565673828, 69.76991271972656, 253.89695739746094], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000280.npy"}
|
|
{"epoch": 0.4111600587371512, "step": 281, "batch_size": 64, "mean": 146.1665496826172, "std": 143.05841064453125, "min": -404.5667419433594, "p10": -5.7428159713745, "median": 148.06478881835938, "p90": 321.8566864013673, "max": 455.16986083984375, "pos_frac": 0.890625, "sample": [66.32840728759766, 268.5121765136719, 140.21055603027344, 234.83311462402344, 105.12548828125, 80.88912963867188, 52.256622314453125, 193.4779052734375, 136.92872619628906, 56.33226776123047, 173.55145263671875, 281.1905517578125, 234.20835876464844, 30.844091415405273, 213.6693572998047, -62.565948486328125, 170.151611328125, 395.06097412109375, 69.68630981445312, 153.09185791015625, 35.146209716796875, 232.9557647705078, 237.69383239746094, 435.2370300292969, 143.0377197265625, 167.11074829101562, 193.71102905273438, 273.1846923828125, 19.737590789794922, 441.14697265625, 268.83740234375, 136.5235595703125, 184.427978515625, 361.34619140625, 220.4293212890625, -404.5667419433594, 107.02662658691406, 111.28843688964844, 158.30126953125, 62.635589599609375, 455.16986083984375, 36.77854919433594, 64.12928771972656, 210.1653289794922, 153.7037353515625, 240.64218139648438, 86.0906753540039, 167.61509704589844, 333.9154357910156, 105.60279083251953, -24.40502166748047, 110.6597671508789, -96.66700744628906, 47.362518310546875, -59.7689208984375, 5.971717834472656, -50.098419189453125, 293.7196044921875, 201.1156768798828, 49.807533264160156, 78.40279388427734, 210.72413635253906, -10.763330459594727, 365.7904357910156], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000281.npy"}
|
|
{"epoch": 0.41262848751835535, "step": 282, "batch_size": 64, "mean": 116.4205551147461, "std": 165.1771697998047, "min": -140.66683959960938, "p10": -66.47250518798828, "median": 103.60720825195312, "p90": 272.58636474609375, "max": 867.6843872070312, "pos_frac": 0.765625, "sample": [411.5652770996094, 126.84207153320312, 68.30953979492188, -109.84156036376953, 167.607666015625, 201.36248779296875, 80.95887756347656, 63.496429443359375, 106.13604736328125, 266.8983154296875, 255.62852478027344, 165.60455322265625, 248.34054565429688, 80.73784637451172, -25.735214233398438, 193.66195678710938, 867.6843872070312, 110.84754180908203, 121.46762084960938, 90.39163208007812, 85.39605712890625, 229.73220825195312, 26.605709075927734, -71.84918212890625, 439.42364501953125, 395.9223327636719, 117.22517395019531, 170.4546356201172, 107.67996215820312, 414.85894775390625, 262.82366943359375, 123.22369384765625, 272.58636474609375, -16.727569580078125, 42.4879264831543, 122.70162963867188, -91.29411315917969, -59.00511932373047, 51.26727294921875, -73.78384399414062, -42.754150390625, 60.83526611328125, 116.1495590209961, 168.24588012695312, 229.0527801513672, 17.05004119873047, -140.66683959960938, 272.58636474609375, 159.61624145507812, 101.078369140625, -40.665611267089844, -56.66923522949219, 39.08253479003906, 49.11076736450195, 126.70991516113281, -34.25189208984375, 234.6795654296875, 15.45863151550293, -69.67281341552734, 298.8159484863281, -111.877197265625, 20.062068939208984, -57.383522033691406, 54.62907409667969], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000282.npy"}
|
|
{"epoch": 0.41409691629955947, "step": 283, "batch_size": 64, "mean": 171.64404296875, "std": 163.65830993652344, "min": -308.7904968261719, "p10": -27.49814796447753, "median": 163.06246185302734, "p90": 354.6368347167969, "max": 611.40478515625, "pos_frac": 0.828125, "sample": [-10.09494400024414, -12.821281433105469, 163.19664001464844, 38.346961975097656, 289.7106628417969, 330.57373046875, 40.0787353515625, 225.9993438720703, 333.30352783203125, 43.105369567871094, -4.308082580566406, 158.3086395263672, -16.413875579833984, 41.16297912597656, 70.81489562988281, 103.2159423828125, -308.7904968261719, 69.011474609375, 250.78189086914062, -32.24855041503906, 447.0552673339844, 354.874267578125, 135.10975646972656, 262.26287841796875, 115.84483337402344, 173.64849853515625, -37.07435607910156, 353.24285888671875, 279.14666748046875, 470.402587890625, 12.67706298828125, 267.5828552246094, -58.67460632324219, 162.92828369140625, 307.67205810546875, 70.18461608886719, 91.19479370117188, -45.67476272583008, 101.25486755371094, -53.313812255859375, 371.70513916015625, 222.6407012939453, 288.3702392578125, 135.880126953125, 285.3041687011719, 202.22171020507812, 98.1447982788086, 178.279052734375, 328.07293701171875, 238.24270629882812, 489.3789978027344, 92.17098236083984, 353.49432373046875, 132.72613525390625, 46.44578552246094, 297.79473876953125, 396.6573791503906, 194.6695098876953, 260.6912841796875, 611.40478515625, 354.08282470703125, 82.65316009521484, 173.318115234375, -32.409423828125], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000283.npy"}
|
|
{"epoch": 0.4155653450807636, "step": 284, "batch_size": 64, "mean": 162.70361328125, "std": 158.869384765625, "min": -166.00323486328125, "p10": -13.432388687133773, "median": 132.75334930419922, "p90": 361.44615783691427, "max": 645.385986328125, "pos_frac": 0.890625, "sample": [-51.683624267578125, 101.15867614746094, 234.07691955566406, 37.670230865478516, 139.60833740234375, 28.826597213745117, 307.0311279296875, 26.21380615234375, 252.36111450195312, 274.08709716796875, 301.0653076171875, -19.83975601196289, 381.6286926269531, 39.10718536376953, 227.84759521484375, 66.52020263671875, 47.538047790527344, 249.32879638671875, 280.18408203125, 445.7408142089844, 27.61041259765625, 504.0891418457031, 133.64019775390625, 294.34912109375, 145.33889770507812, 8.136734008789062, 131.8665008544922, 62.447689056396484, 229.29762268066406, -30.220458984375, 79.84710693359375, 581.3042602539062, 207.04193115234375, -46.83397674560547, 130.85598754882812, 108.28179931640625, -24.773597717285156, 110.5273666381836, 207.33872985839844, 645.385986328125, 108.50291442871094, 189.66647338867188, 137.88714599609375, 13.661409378051758, -23.086669921875, -166.00323486328125, 236.00314331054688, 314.35357666015625, 92.69944763183594, 207.59356689453125, 480.26318359375, 121.787353515625, 213.71719360351562, 211.85411071777344, 73.38630676269531, 5.353748321533203, 1.5181350708007812, 60.605857849121094, 257.237548828125, 242.07420349121094, 16.06549835205078, 398.69805908203125, 87.77318572998047, 257.4158630371094], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000284.npy"}
|
|
{"epoch": 0.4170337738619677, "step": 285, "batch_size": 64, "mean": 145.0532684326172, "std": 153.56582641601562, "min": -431.8997802734375, "p10": -35.92209815979004, "median": 161.87935638427734, "p90": 339.53879699707034, "max": 503.75213623046875, "pos_frac": 0.84375, "sample": [297.50115966796875, 233.17135620117188, 39.684417724609375, 330.61749267578125, 102.6740493774414, 99.39906311035156, 178.95880126953125, 72.86511993408203, 179.90921020507812, 382.6705322265625, 226.51039123535156, 184.64889526367188, 312.55682373046875, 118.56204986572266, 133.55665588378906, 503.75213623046875, 16.239349365234375, 33.80084228515625, 188.81124877929688, 212.834228515625, 359.8583679199219, -121.52118682861328, 70.13951873779297, 201.93991088867188, 348.7578430175781, 46.48596954345703, -17.897159576416016, 98.69024658203125, 85.30966186523438, 45.972816467285156, 209.92971801757812, 131.03878784179688, -49.493629455566406, 39.551082611083984, 236.45849609375, 343.3622131347656, 84.8367691040039, -431.8997802734375, 239.14694213867188, 52.905975341796875, 183.66387939453125, -2.349578857421875, 478.114013671875, 201.27792358398438, -44.273353576660156, 259.5366516113281, 207.546875, 156.684326171875, 241.52023315429688, 288.9133605957031, 99.501708984375, -119.02511596679688, 227.61172485351562, 173.16220092773438, -85.27418518066406, -37.04562759399414, 359.7579345703125, 67.81344604492188, 97.19344329833984, 167.0743865966797, -33.30052947998047, 260.94720458984375, 195.16586303710938, 116.89588165283203], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000285.npy"}
|
|
{"epoch": 0.4185022026431718, "step": 286, "batch_size": 64, "mean": 164.3234405517578, "std": 178.20187377929688, "min": -232.59426879882812, "p10": -53.87557678222655, "median": 175.6259536743164, "p90": 421.2247314453125, "max": 564.7205810546875, "pos_frac": 0.78125, "sample": [77.1474380493164, -30.055259704589844, 488.3996276855469, 161.31515502929688, 174.56326293945312, 177.71923828125, 88.1867904663086, 177.228515625, 267.269775390625, -232.59426879882812, 237.66824340820312, 330.8383483886719, 2.2634735107421875, -28.487451553344727, 258.1974182128906, 180.97232055664062, 442.3029479980469, -36.18695068359375, 132.34548950195312, 176.6886444091797, 470.70703125, 73.5596923828125, -212.827392578125, 417.00225830078125, 269.92437744140625, -93.00537109375, -35.66630172729492, 324.25885009765625, 267.870849609375, 454.07635498046875, 398.54248046875, -69.20501708984375, 204.2812042236328, 89.17849731445312, 1.2724151611328125, 82.42097473144531, 308.68133544921875, 169.8999786376953, 107.19625854492188, -45.00291442871094, 239.8165740966797, -33.767784118652344, 188.57244873046875, -66.64832305908203, 47.30863952636719, 349.9842529296875, 423.03436279296875, 157.7030029296875, -96.93502044677734, 477.5243835449219, -33.993141174316406, 180.66415405273438, 81.13410949707031, 344.016845703125, 106.4578857421875, 290.90936279296875, 140.50347900390625, 202.41122436523438, 188.68133544921875, -57.67814636230469, 564.7205810546875, 306.722900390625, 205.1951904296875, 81.41373443603516], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000286.npy"}
|
|
{"epoch": 0.4199706314243759, "step": 287, "batch_size": 64, "mean": 133.5565948486328, "std": 170.0500946044922, "min": -261.6867980957031, "p10": -56.04768753051757, "median": 121.81001663208008, "p90": 390.30400390625, "max": 665.804931640625, "pos_frac": 0.84375, "sample": [116.93510437011719, 67.3438720703125, 36.00359344482422, 126.68492889404297, 107.1889877319336, -21.311187744140625, 251.54730224609375, 490.1903076171875, 146.5060272216797, 128.6743927001953, 451.70135498046875, 108.88802337646484, 148.13648986816406, 442.8515625, -46.22575378417969, -76.62721252441406, 30.66924285888672, 391.02972412109375, 184.713134765625, 284.2874755859375, 144.85552978515625, 5.537502288818359, 165.88592529296875, -261.6867980957031, 665.804931640625, 69.13423919677734, 55.650543212890625, 15.51593017578125, 16.365127563476562, -60.25708770751953, 130.42868041992188, 35.351890563964844, -73.12637329101562, 145.70863342285156, 58.61600112915039, 240.55015563964844, 45.51513671875, 225.88670349121094, 92.72251892089844, 131.8486785888672, 40.36102294921875, 278.935302734375, 128.1507568359375, 106.55516052246094, 0.7770843505859375, 107.83090209960938, 61.60357666015625, 29.201004028320312, 388.61065673828125, 205.24444580078125, -154.3839111328125, 59.51085662841797, 193.42942810058594, 168.6397247314453, 405.6313171386719, -157.33920288085938, -156.75315856933594, -27.47612762451172, 175.00936889648438, 272.9723205566406, 434.25372314453125, 365.53521728515625, 166.51031494140625, 235.31674194335938], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000287.npy"}
|
|
{"epoch": 0.42143906020558003, "step": 288, "batch_size": 64, "mean": 166.12017822265625, "std": 158.62615966796875, "min": -268.471923828125, "p10": -7.952437591552707, "median": 145.9478530883789, "p90": 403.61925354003915, "max": 575.8711547851562, "pos_frac": 0.890625, "sample": [185.7037353515625, 133.10650634765625, -37.35329818725586, 214.6847686767578, 237.0286407470703, 325.0294189453125, 28.100492477416992, 575.8711547851562, 49.82470703125, 313.15203857421875, 207.204833984375, 130.99905395507812, -58.048500061035156, 19.346435546875, 55.57119369506836, 143.2791290283203, 120.84886169433594, 386.2256164550781, 150.4327392578125, 302.6084289550781, 148.6165771484375, 213.409423828125, 218.1705322265625, 25.08917236328125, 105.72578430175781, 199.45733642578125, 202.61480712890625, 57.52257537841797, 143.19210815429688, 55.85588073730469, 120.13652801513672, 162.4210205078125, 505.8858947753906, 190.7354736328125, 411.07366943359375, 293.36663818359375, 98.5621337890625, 100.92916870117188, 206.417236328125, 125.7474594116211, -268.471923828125, 468.0258483886719, 83.87277221679688, 207.9533233642578, 76.56715393066406, 176.67257690429688, 113.9394760131836, 51.975372314453125, 456.36175537109375, 129.74392700195312, 251.38931274414062, 433.55059814453125, 187.24859619140625, -187.75511169433594, 123.87315368652344, 422.61181640625, 215.8531494140625, 55.26927185058594, 68.14237976074219, 331.33477783203125, -19.651954650878906, 324.653076171875, -92.13951110839844, -47.873382568359375], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000288.npy"}
|
|
{"epoch": 0.42290748898678415, "step": 289, "batch_size": 64, "mean": 183.99862670898438, "std": 184.90504455566406, "min": -147.64987182617188, "p10": -17.36345596313476, "median": 123.7863883972168, "p90": 484.824969482422, "max": 573.1292114257812, "pos_frac": 0.84375, "sample": [174.43545532226562, -5.687469482421875, 534.3580322265625, 516.9903564453125, -147.64987182617188, 518.10302734375, 299.74139404296875, 237.7816162109375, 95.23016357421875, 505.8049621582031, 426.5179443359375, 347.83331298828125, 260.8642272949219, 463.46954345703125, 316.87176513671875, 284.5509033203125, 64.26237487792969, 573.1292114257812, -9.013481140136719, 276.41802978515625, 120.000244140625, 30.594205856323242, 109.40151977539062, -12.145915985107422, -51.464996337890625, 107.78662109375, 280.48876953125, 322.7132873535156, 49.689178466796875, 122.55241394042969, 329.9841613769531, 565.6943359375, 308.8852844238281, -19.599544525146484, 78.7634048461914, 19.354568481445312, 493.977294921875, 87.49876403808594, -38.281776428222656, 21.4295711517334, 93.38922119140625, 260.3927001953125, -45.27519226074219, 224.78182983398438, 125.0203628540039, 369.67059326171875, 392.9673156738281, 17.048952102661133, 104.30024719238281, -83.60069274902344, 78.50039672851562, 130.9775390625, 94.190673828125, 28.513792037963867, 435.263671875, 157.978759765625, 36.15925598144531, 60.55040740966797, 78.99822998046875, 217.68673706054688, 153.57601928710938, -143.67568969726562, 231.57522583007812, 95.5886459350586], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000289.npy"}
|
|
{"epoch": 0.4243759177679883, "step": 290, "batch_size": 64, "mean": 183.73873901367188, "std": 213.23936462402344, "min": -248.312744140625, "p10": -30.444560241699218, "median": 167.92166137695312, "p90": 466.11512145996096, "max": 731.561279296875, "pos_frac": 0.828125, "sample": [430.43231201171875, 145.75765991210938, 137.292236328125, 174.22537231445312, -231.10861206054688, 300.79815673828125, 391.44049072265625, -30.874267578125, -29.441909790039062, 115.85153198242188, -129.5423583984375, 306.1281433105469, 171.3564453125, -10.546722412109375, 229.9107666015625, 215.15054321289062, 459.669921875, 14.2454833984375, 208.824951171875, 33.604251861572266, 64.76545715332031, 28.897384643554688, -116.473388671875, 149.1444091796875, 95.78012084960938, -248.312744140625, 104.06024169921875, 233.5146484375, 731.561279296875, 638.62158203125, 48.90882873535156, 221.24903869628906, 266.060546875, 589.5023193359375, 130.83245849609375, 27.987504959106445, 323.94610595703125, 110.19096374511719, 230.36471557617188, 164.48687744140625, 2.77520751953125, 203.81884765625, 194.6797637939453, 282.96514892578125, 27.501739501953125, 288.51519775390625, -17.503257751464844, -62.35530090332031, 286.5182800292969, -86.635009765625, 561.4842529296875, 384.68896484375, 220.05294799804688, 468.8773498535156, 34.424930572509766, 453.87298583984375, 495.638671875, 274.0962219238281, 3.385143280029297, 28.515779495239258, 54.02078628540039, 730.7586669921875, 259.21514892578125, -28.29513931274414], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000290.npy"}
|
|
{"epoch": 0.42584434654919234, "step": 291, "batch_size": 64, "mean": 169.84747314453125, "std": 183.22950744628906, "min": -329.1833801269531, "p10": -51.39702606201171, "median": 166.60750579833984, "p90": 402.51853332519534, "max": 533.7692260742188, "pos_frac": 0.8125, "sample": [161.44285583496094, 163.34605407714844, 13.55621337890625, 45.857948303222656, -8.671890258789062, 280.0264892578125, 520.358154296875, 231.85284423828125, 360.822509765625, 271.64471435546875, 40.5667724609375, 405.81890869140625, 287.7239990234375, 9.699256896972656, 533.7692260742188, -32.482025146484375, 394.8176574707031, 199.599609375, -66.12259674072266, 187.17994689941406, 378.6410217285156, 16.742294311523438, 257.86395263671875, 66.40628051757812, 251.69065856933594, 356.0040283203125, 155.97531127929688, 211.0826873779297, 366.4546203613281, 17.435131072998047, 195.73236083984375, 155.77529907226562, 277.83770751953125, -130.39308166503906, 49.0419921875, -125.96142578125, -44.991539001464844, 76.5877914428711, -329.1833801269531, 412.2780456542969, 328.9478759765625, 417.385498046875, 317.9209289550781, 169.86895751953125, -58.427215576171875, -7.3335418701171875, -54.142234802246094, 329.2618408203125, 34.088226318359375, 133.93173217773438, 64.79664611816406, 114.6429443359375, 351.6459655761719, 220.0928955078125, -20.775604248046875, 200.60867309570312, 364.8211975097656, 88.33226013183594, 503.90936279296875, 15.096916198730469, -58.391258239746094, 359.4830322265625, 434.9210205078125, 3.7262001037597656], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000291.npy"}
|
|
{"epoch": 0.42731277533039647, "step": 292, "batch_size": 64, "mean": 171.28152465820312, "std": 207.59959411621094, "min": -567.1646728515625, "p10": -45.94459533691405, "median": 182.93144989013672, "p90": 431.43632812500005, "max": 623.2181396484375, "pos_frac": 0.8125, "sample": [189.78720092773438, 71.78024291992188, -9.786758422851562, 162.22259521484375, 230.23764038085938, 157.80941772460938, 120.4383544921875, 119.12654876708984, 218.82012939453125, -567.1646728515625, 576.3311767578125, 49.81401824951172, 506.0732421875, 111.59194946289062, 250.63912963867188, -51.08308410644531, -211.4017333984375, 416.20782470703125, 68.38166046142578, 328.6962890625, 3.7997512817382812, 574.1934204101562, 340.5395812988281, 205.97137451171875, 346.8623352050781, 468.0752258300781, 263.9866638183594, 345.83135986328125, 113.89263153076172, 86.1607666015625, -61.44533920288086, 216.57199096679688, 9.1174898147583, 368.6413879394531, -13.257789611816406, 111.67452239990234, -73.94972229003906, 177.4122772216797, 183.0532684326172, 90.37541198730469, 55.52442932128906, 437.96282958984375, 188.85833740234375, 220.5475311279297, 197.38209533691406, 201.88877868652344, 295.0237731933594, 318.1330261230469, 623.2181396484375, 27.239011764526367, 143.2437744140625, 207.597900390625, 254.2721710205078, -184.85008239746094, 211.14915466308594, -154.69183349609375, 595.4301147460938, -24.90310287475586, -26.20095443725586, 393.94482421875, 232.4163055419922, 182.80963134765625, 103.94917297363281, -33.95478820800781], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000292.npy"}
|
|
{"epoch": 0.4287812041116006, "step": 293, "batch_size": 64, "mean": 142.2510986328125, "std": 197.51341247558594, "min": -226.0537872314453, "p10": -91.52276763916014, "median": 135.5185203552246, "p90": 390.7400207519531, "max": 722.4151611328125, "pos_frac": 0.734375, "sample": [34.23237991333008, 4.4942626953125, 32.671775817871094, 46.396217346191406, 243.29296875, 208.42201232910156, 395.2662658691406, 191.03515625, 390.06561279296875, 58.110939025878906, 383.9506530761719, -71.23890686035156, 231.63943481445312, -34.25045394897461, 722.4151611328125, -157.3227081298828, 148.95114135742188, 235.49652099609375, 65.89303588867188, 181.49929809570312, 298.5077209472656, -152.55886840820312, 226.26309204101562, -71.34588623046875, 31.096982955932617, 122.6345443725586, -77.14334106445312, 148.40249633789062, 30.731399536132812, 478.73834228515625, 391.029052734375, 309.22161865234375, 284.7608337402344, 309.6510009765625, 236.3368377685547, -97.68537902832031, 48.844451904296875, 182.10577392578125, 161.28561401367188, -28.42995262145996, -46.243385314941406, 73.58683013916016, -7.8287353515625, 237.58578491210938, 346.6849365234375, 344.08123779296875, 232.41656494140625, 282.11895751953125, 117.99964904785156, -185.33981323242188, 21.8985595703125, -226.0537872314453, -43.28166198730469, 435.78509521484375, 555.7470092773438, 60.64495086669922, -101.56201171875, 212.95692443847656, 57.02348709106445, -49.77978515625, -139.69076538085938, 356.4557189941406, 470.28369140625, -44.885948181152344], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000293.npy"}
|
|
{"epoch": 0.4302496328928047, "step": 294, "batch_size": 64, "mean": 154.00192260742188, "std": 192.43959045410156, "min": -208.82522583007812, "p10": -86.82061271667477, "median": 113.19638061523438, "p90": 400.260223388672, "max": 729.1492309570312, "pos_frac": 0.859375, "sample": [48.48786163330078, 58.5496826171875, 288.2471618652344, 376.84521484375, 101.003662109375, 41.50312423706055, -103.3151626586914, 113.70482635498047, 6.284843444824219, 138.88101196289062, 11.786758422851562, 20.5069580078125, -58.930171966552734, 457.69757080078125, 4.897422790527344, 209.65451049804688, 172.03660583496094, 356.67877197265625, 67.58743286132812, 149.3546905517578, 564.2808227539062, -122.18717956542969, 233.82635498046875, 94.27772521972656, 518.51123046875, 317.4933166503906, 286.24395751953125, 112.68793487548828, 150.37506103515625, -98.7736587524414, 169.97122192382812, 305.5774230957031, 273.3309326171875, 729.1492309570312, -194.20504760742188, -148.3139190673828, 534.3701782226562, 187.39773559570312, 312.5111999511719, 67.21845245361328, -208.82522583007812, -156.70108032226562, 282.45782470703125, 311.37518310546875, 495.9305419921875, 116.25222778320312, 43.21089553833008, 93.89248657226562, 410.29522705078125, 1.0417957305908203, 308.9959716796875, 288.0616760253906, 69.83312225341797, 150.36244201660156, 123.87294006347656, 27.4927978515625, 80.54450988769531, 75.42142486572266, -3.0225181579589844, 87.69618225097656, 348.90972900390625, 47.061737060546875, 27.6063232421875, 79.15125274658203], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000294.npy"}
|
|
{"epoch": 0.43171806167400884, "step": 295, "batch_size": 64, "mean": 156.38516235351562, "std": 185.59471130371094, "min": -198.46702575683594, "p10": -51.81902275085449, "median": 145.01639556884766, "p90": 383.0644409179688, "max": 695.0789794921875, "pos_frac": 0.78125, "sample": [-178.929443359375, 44.00581359863281, 389.0269775390625, 5.375799179077148, -198.46702575683594, 205.9539794921875, -47.25468063354492, 272.30841064453125, 284.27532958984375, -168.89471435546875, 246.87338256835938, 3.3076324462890625, 144.95437622070312, 345.0638427734375, 326.1037902832031, 287.1378173828125, 254.4820098876953, 255.55984497070312, -41.12200164794922, 58.38043975830078, 310.26446533203125, -39.806663513183594, 106.986328125, 264.9417419433594, -20.14733123779297, 66.72859191894531, 410.2958679199219, -115.01419067382812, 474.82574462890625, 140.63861083984375, 295.6775207519531, 23.651351928710938, -21.625797271728516, -136.79339599609375, 369.15185546875, 499.11260986328125, 38.23430633544922, -19.066078186035156, -73.13484191894531, 145.0784149169922, 163.72377014160156, 283.2442626953125, 329.4713134765625, 39.13063049316406, -39.36829376220703, 61.42889404296875, 189.3647003173828, 86.04505157470703, 215.65084838867188, 478.0978088378906, 187.3758544921875, 222.52008056640625, 35.786956787109375, 695.0789794921875, 360.976806640625, 365.196533203125, 128.85787963867188, 182.6397705078125, 151.28311157226562, 137.3982696533203, 65.81585693359375, 91.31596374511719, 423.2501525878906, -53.775169372558594], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000295.npy"}
|
|
{"epoch": 0.4331864904552129, "step": 296, "batch_size": 64, "mean": 120.3759994506836, "std": 201.37908935546875, "min": -390.6088562011719, "p10": -104.34927902221679, "median": 111.09738540649414, "p90": 353.56329650878916, "max": 590.0101318359375, "pos_frac": 0.734375, "sample": [116.18389129638672, 89.73230743408203, 42.43145751953125, 587.4168090820312, -40.77191162109375, 331.0783386230469, -103.03917694091797, -31.566879272460938, -35.16382598876953, -386.4525146484375, 152.48007202148438, 140.880126953125, -278.224365234375, 232.3409423828125, 106.01087951660156, -187.73446655273438, 100.31706237792969, 275.79534912109375, 504.3928527832031, 314.23333740234375, 182.2064208984375, 197.14898681640625, -5.096839904785156, -29.98876953125, 69.29461669921875, 304.7412109375, 29.789642333984375, -108.81367492675781, 8.549072265625, -104.91075134277344, 446.53131103515625, 81.45619201660156, 220.6239776611328, 81.29454803466797, 102.2269287109375, 515.3447875976562, 192.3922576904297, 54.65456771850586, -390.6088562011719, 267.13372802734375, -197.25247192382812, 284.8342590332031, -70.96057891845703, -9.868776321411133, 407.356689453125, 102.23009490966797, 133.52923583984375, 189.44439697265625, 131.39227294921875, 86.59517669677734, 132.8443603515625, -10.635124206542969, 21.746803283691406, 590.0101318359375, 220.39784240722656, 252.89370727539062, 91.96106719970703, 158.467041015625, 363.19970703125, -67.25080871582031, 139.4279022216797, 210.7099609375, 186.7859344482422, 311.89569091796875], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000296.npy"}
|
|
{"epoch": 0.434654919236417, "step": 297, "batch_size": 64, "mean": 135.5083465576172, "std": 144.17984008789062, "min": -206.58929443359375, "p10": -11.107643890380858, "median": 109.0191879272461, "p90": 337.37298278808595, "max": 517.5526123046875, "pos_frac": 0.859375, "sample": [75.2579345703125, 130.06031799316406, 292.3166198730469, 461.4025573730469, 101.53312683105469, 6.916833877563477, 305.22381591796875, 160.62054443359375, -126.83805084228516, -63.56975173950195, 63.01605224609375, 140.31607055664062, 261.4598083496094, 223.73292541503906, 370.0228271484375, 29.510671615600586, 119.40496826171875, 68.98565673828125, 7.3946533203125, -12.281787872314453, 26.770492553710938, -45.51136779785156, 44.11668395996094, 35.28373718261719, 349.34759521484375, 46.899261474609375, 291.1021423339844, 65.551513671875, 199.590576171875, 225.2135009765625, 168.68841552734375, 101.50555419921875, 340.03558349609375, 189.3322296142578, 250.4356231689453, 111.7689208984375, 154.3345489501953, 27.698951721191406, 387.23876953125, 228.72097778320312, 27.601699829101562, 197.17359924316406, -175.3006591796875, 184.88291931152344, 74.09786224365234, -11.352119445800781, 168.7978057861328, 83.68161010742188, 517.5526123046875, -10.537200927734375, 95.99813842773438, 345.5510559082031, 188.98171997070312, -0.711029052734375, -206.58929443359375, 236.301513671875, 33.857154846191406, 58.07890319824219, 164.35501098632812, 331.1602478027344, 106.26945495605469, 305.73126220703125, 47.84381866455078, 96.49888610839844], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000297.npy"}
|
|
{"epoch": 0.43612334801762115, "step": 298, "batch_size": 64, "mean": 170.81991577148438, "std": 212.45375061035156, "min": -293.27435302734375, "p10": -50.42251892089843, "median": 141.84296417236328, "p90": 462.1403045654298, "max": 899.1719360351562, "pos_frac": 0.828125, "sample": [86.57637786865234, -59.877532958984375, 38.099674224853516, -293.27435302734375, 26.806488037109375, -86.36761474609375, 1.6064624786376953, 537.2303466796875, 147.477783203125, 55.82415771484375, 140.7396240234375, 430.4146728515625, 22.480453491210938, 9.503860473632812, -59.6124267578125, 0.6960067749023438, 113.50157928466797, 144.75497436523438, 139.4991455078125, 180.71961975097656, 537.5922241210938, -9.924240112304688, 235.38722229003906, -170.13558959960938, 503.0911865234375, 244.21566772460938, -43.25486755371094, 433.767578125, 473.3832092285156, 542.0870361328125, -35.070579528808594, -39.025970458984375, -59.914573669433594, 4.2586669921875, 17.373695373535156, 435.9068603515625, 514.2035522460938, 172.0320587158203, 386.81427001953125, 240.41372680664062, 21.980497360229492, 2.6575164794921875, 899.1719360351562, 229.47784423828125, 430.915283203125, 163.41055297851562, 317.55230712890625, 376.7791442871094, 46.718814849853516, 142.94630432128906, 69.90109252929688, 355.65142822265625, 259.3017578125, 73.58362579345703, 363.99249267578125, 167.89291381835938, 25.534452438354492, 310.0549621582031, -53.49436950683594, 155.16989135742188, 128.54165649414062, 150.5016326904297, 53.87962341308594, 280.35302734375], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000298.npy"}
|
|
{"epoch": 0.43759177679882527, "step": 299, "batch_size": 64, "mean": 160.1863250732422, "std": 191.46786499023438, "min": -210.52084350585938, "p10": -24.910943222045898, "median": 123.31755447387695, "p90": 471.03910827636724, "max": 625.6597900390625, "pos_frac": 0.8125, "sample": [-85.29105377197266, -210.52084350585938, -111.01637268066406, 267.8553771972656, -15.38983154296875, 3.504762649536133, 320.3339538574219, 91.26629638671875, 55.37853240966797, 117.78746795654297, 546.7384033203125, 456.855712890625, 569.7762451171875, 27.785654067993164, 265.60308837890625, 625.6597900390625, -184.46072387695312, 329.8961486816406, 154.4669189453125, 190.72447204589844, -20.924367904663086, 272.61181640625, 68.4439468383789, 111.91973876953125, 221.43502807617188, 86.97366333007812, 621.7669067382812, 81.11996459960938, 104.19263458251953, -25.06485366821289, -23.820138931274414, 128.84764099121094, 18.790191650390625, 83.39659118652344, 184.66453552246094, 477.1177062988281, 87.86924743652344, 204.7519989013672, 24.7049560546875, 134.9893798828125, 98.1306381225586, 261.3762512207031, 291.15069580078125, -8.32101821899414, 211.83230590820312, 72.34917449951172, -189.6979522705078, 429.8786315917969, 136.82440185546875, 488.9858093261719, -60.049720764160156, 21.34064483642578, 204.63250732421875, 245.86807250976562, 167.12893676757812, 254.73239135742188, 91.63447570800781, 480.5179443359375, 87.76763916015625, 145.38339233398438, -24.55181884765625, 320.159423828125, 27.730567932128906, 236.38031005859375], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000299.npy"}
|
|
{"epoch": 0.4390602055800294, "step": 300, "batch_size": 64, "mean": 185.663818359375, "std": 214.26132202148438, "min": -284.05108642578125, "p10": -123.26449890136716, "median": 210.7198028564453, "p90": 435.9978881835938, "max": 638.669921875, "pos_frac": 0.796875, "sample": [262.46429443359375, 254.33642578125, 183.1904296875, 245.3549041748047, 394.129638671875, -198.99465942382812, -58.83690643310547, 479.74481201171875, 359.2769775390625, -14.318363189697266, -137.30384826660156, 151.57870483398438, 243.97535705566406, 367.0833435058594, 201.93145751953125, 337.1972961425781, 80.53907775878906, 441.4248046875, 27.407644271850586, 88.0131607055664, 0.45660400390625, 195.16342163085938, 226.22894287109375, -1.8949966430664062, 340.1233215332031, 367.5660400390625, -93.80838012695312, 49.36122131347656, 250.5275421142578, 261.578125, 86.20091247558594, 565.8751220703125, 195.45541381835938, -268.3867492675781, 373.97747802734375, -212.80511474609375, 469.84710693359375, 226.15737915039062, 420.54473876953125, 204.2755126953125, 86.71228790283203, 379.57940673828125, 423.3350830078125, 264.5855407714844, 83.37911987304688, 302.5265808105469, 48.48289108276367, -91.94308471679688, 518.1331787109375, 331.9324035644531, 409.43267822265625, -284.05108642578125, 217.16409301757812, -179.26454162597656, 79.76138305664062, 140.35983276367188, 414.5770568847656, 59.29071044921875, 506.0798645019531, 638.669921875, -135.8885498046875, 83.61207580566406, 234.9476318359375, -13.568679809570312], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000300.npy"}
|
|
{"epoch": 0.44052863436123346, "step": 301, "batch_size": 64, "mean": 179.134765625, "std": 195.379638671875, "min": -124.2564697265625, "p10": -26.858080482482897, "median": 122.06006240844727, "p90": 426.29668884277345, "max": 780.0227661132812, "pos_frac": 0.828125, "sample": [103.41184997558594, 311.76873779296875, -80.0394058227539, 338.6728820800781, 318.197509765625, 272.5623779296875, 562.519775390625, 111.43391418457031, 403.8145751953125, -15.279218673706055, 576.626220703125, 352.9410705566406, 112.2494888305664, 261.72509765625, -54.04206848144531, 182.33653259277344, 84.6106185913086, 454.9075927734375, 63.57084655761719, 276.69342041015625, 628.4874267578125, 22.721271514892578, 1.264150619506836, 39.21977233886719, 340.44354248046875, 94.79788208007812, 61.42400360107422, -7.8990325927734375, 234.63894653320312, 312.6493835449219, 263.16741943359375, 256.40185546875, 77.97393798828125, 153.84906005859375, 65.19017791748047, 55.102813720703125, 147.1700897216797, -31.820449829101562, 9.88340950012207, 63.781951904296875, 65.78472900390625, 42.46837615966797, 144.6295166015625, -8.920127868652344, 16.752822875976562, 780.0227661132812, 340.3634948730469, 131.87063598632812, 426.46337890625, 93.25814819335938, 425.9077453613281, 170.98809814453125, 274.7134704589844, 385.6144104003906, 304.68328857421875, 43.6905632019043, -124.2564697265625, -75.93095397949219, 501.0301818847656, -123.4248046875, 58.25587463378906, -83.09164428710938, -11.33065414428711, 257.95306396484375], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000301.npy"}
|
|
{"epoch": 0.4419970631424376, "step": 302, "batch_size": 64, "mean": 148.50027465820312, "std": 177.6729278564453, "min": -201.1050262451172, "p10": -59.62170257568358, "median": 115.22550582885742, "p90": 390.59251403808594, "max": 655.186767578125, "pos_frac": 0.796875, "sample": [36.16905212402344, 325.96514892578125, 468.14483642578125, -14.400875091552734, 156.62368774414062, 16.55131721496582, 275.3024597167969, 21.094940185546875, 125.57627868652344, 30.058868408203125, 276.3762512207031, 21.466079711914062, 116.60138702392578, 100.87601470947266, 115.4132080078125, 87.07546997070312, 320.05987548828125, -3.270641326904297, 289.57427978515625, 306.8803405761719, 212.8326873779297, 168.19338989257812, 148.0106201171875, 259.0443115234375, -76.71350860595703, -109.0405502319336, 289.0399169921875, 424.8872375488281, 77.22765350341797, -201.1050262451172, 204.17581176757812, -26.09967803955078, 171.40194702148438, -70.94020080566406, -66.84913635253906, 449.1535949707031, 655.186767578125, 387.6614990234375, 101.88426971435547, -42.7576904296875, -40.591552734375, 371.77947998046875, -199.55697631835938, 192.9698486328125, 372.95904541015625, 64.42000579833984, 69.26005554199219, 488.0322265625, 115.03780364990234, 227.80853271484375, -8.259847640991211, 186.32456970214844, -121.67908477783203, 217.86753845214844, 45.37910079956055, 179.98171997070312, 15.717422485351562, 391.8486633300781, 95.86329650878906, 477.41802978515625, 59.615264892578125, 95.11212158203125, 84.57965850830078, 94.79945373535156], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000302.npy"}
|
|
{"epoch": 0.4434654919236417, "step": 303, "batch_size": 64, "mean": 142.41439819335938, "std": 191.52066040039062, "min": -317.1259460449219, "p10": -75.89144287109374, "median": 122.83201217651367, "p90": 434.60995178222663, "max": 590.1939697265625, "pos_frac": 0.734375, "sample": [212.26123046875, -1.2466773986816406, 65.57167053222656, 512.1258544921875, -76.935546875, 270.4530029296875, 422.23004150390625, 548.1004028320312, -10.967909812927246, -94.70199584960938, 262.22918701171875, 159.914794921875, 304.3772888183594, -86.51556396484375, 20.68640899658203, 131.48056030273438, 53.38981628417969, -3.0499706268310547, 3.125223159790039, 348.57537841796875, -317.1259460449219, -3.357494354248047, -50.805931091308594, 270.7942199707031, 340.6028747558594, 454.39898681640625, -64.02005767822266, 231.0941925048828, -167.73907470703125, 115.30570983886719, -83.73818969726562, 244.55514526367188, 449.99395751953125, 179.02232360839844, 359.4658203125, 137.47384643554688, -139.865234375, 79.81771850585938, 95.91996002197266, 590.1939697265625, 422.5356140136719, 32.71478271484375, 162.90951538085938, 462.46466064453125, 85.70852661132812, 134.5255126953125, 202.24688720703125, 307.6949462890625, 126.49471282958984, -16.00348472595215, 119.1693115234375, 4.625297546386719, 199.4565887451172, 31.356006622314453, 207.0420379638672, 439.78466796875, -73.4552001953125, -32.52503967285156, 82.18006896972656, 62.57724380493164, 241.0780792236328, 16.104705810546875, -49.40015411376953, 182.14602661132812], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000303.npy"}
|
|
{"epoch": 0.44493392070484583, "step": 304, "batch_size": 64, "mean": 157.27410888671875, "std": 137.7813262939453, "min": -244.0098114013672, "p10": -3.327085876464841, "median": 164.92305755615234, "p90": 340.22007446289064, "max": 460.3510437011719, "pos_frac": 0.875, "sample": [-34.891021728515625, 190.9861297607422, 267.6383361816406, 87.04851531982422, -4.4825592041015625, -22.253555297851562, 38.531036376953125, 28.881668090820312, 121.73855590820312, 184.28988647460938, 263.3002014160156, 293.24371337890625, 133.81634521484375, 261.0985412597656, 109.48228454589844, 247.79879760742188, 252.830078125, 257.523193359375, 16.21605682373047, 105.96640014648438, 166.25396728515625, 335.775390625, 60.94752502441406, 55.08087158203125, 226.87301635742188, 136.0465087890625, 233.2074737548828, 67.384033203125, 297.96978759765625, 107.73478698730469, 114.81753540039062, 44.631683349609375, 429.03790283203125, 111.0961685180664, -65.81327056884766, -244.0098114013672, 168.40542602539062, -0.6309814453125, 84.95277404785156, 210.20370483398438, -51.027313232421875, 376.69573974609375, 50.28703308105469, 8.333372116088867, 342.12493896484375, 107.9883041381836, 8.912162780761719, 78.3958740234375, 240.08139038085938, 187.88027954101562, 259.1523742675781, -108.84526062011719, 163.59214782714844, 460.3510437011719, 346.47900390625, 95.71321105957031, 243.70864868164062, 205.36099243164062, 354.23919677734375, 383.08905029296875, 318.90838623046875, 237.45040893554688, 218.8455810546875, 199.09915161132812], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000304.npy"}
|
|
{"epoch": 0.44640234948604995, "step": 305, "batch_size": 64, "mean": 140.1407470703125, "std": 185.44822692871094, "min": -215.14117431640625, "p10": -65.17012329101563, "median": 102.47720718383789, "p90": 381.56073913574227, "max": 879.539794921875, "pos_frac": 0.78125, "sample": [329.5921936035156, 217.55474853515625, 95.09010314941406, 178.77490234375, 157.41702270507812, 533.6361694335938, 213.996826171875, 49.20466613769531, 223.1515655517578, 441.6195373535156, 74.86570739746094, 70.4487533569336, 115.70958709716797, -31.023651123046875, -82.0982666015625, 82.2959213256836, 225.21884155273438, 389.8567810058594, 66.09821319580078, 52.66722869873047, 879.539794921875, 269.8877258300781, 58.56801986694336, 227.2021026611328, 295.2196044921875, -64.63600158691406, 35.57306671142578, 225.15061950683594, 190.2327117919922, -87.39816284179688, -21.40021514892578, 283.35650634765625, -65.39903259277344, 254.02850341796875, 192.25979614257812, 119.607666015625, 337.7679748535156, 162.28277587890625, 8.043367385864258, 109.86431121826172, 85.0975570678711, -29.324974060058594, 92.49519348144531, 78.0345687866211, 434.97119140625, 428.6357421875, 209.0543670654297, -33.340057373046875, -215.14117431640625, 160.11666870117188, -131.49905395507812, 443.47003173828125, 362.20330810546875, 40.788780212402344, -67.21231079101562, 21.86962890625, -50.45014190673828, 25.363609313964844, 196.65118408203125, -126.24665069580078, 18.180763244628906, 212.79644775390625, -16.428726196289062, 15.094705581665039], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000305.npy"}
|
|
{"epoch": 0.447870778267254, "step": 306, "batch_size": 64, "mean": 137.0084991455078, "std": 159.19259643554688, "min": -254.8568115234375, "p10": -28.372297286987305, "median": 107.39479446411133, "p90": 309.96113281250007, "max": 701.151611328125, "pos_frac": 0.765625, "sample": [19.007587432861328, -44.90011215209961, 103.83111572265625, 32.826988220214844, -14.90924072265625, -7.9004058837890625, 271.8032531738281, 57.04547882080078, 66.57330322265625, -15.978462219238281, 171.9865264892578, 278.2685241699219, 117.69813537597656, 287.377685546875, 49.029327392578125, 81.33577728271484, -77.7593994140625, 394.7651062011719, 133.60427856445312, 39.58531188964844, 51.04711151123047, -36.502601623535156, -29.037628173828125, 295.7329406738281, 249.74996948242188, 225.98731994628906, -26.81985855102539, -5.045818328857422, 345.5405578613281, 162.74281311035156, 96.550048828125, 110.9584732055664, 249.40048217773438, 274.733642578125, 68.30789184570312, 210.2162322998047, 73.25143432617188, -73.12425994873047, 40.51308059692383, 228.71275329589844, -254.8568115234375, 282.73468017578125, 103.12299346923828, -71.29737854003906, 24.085466384887695, 253.75439453125, -24.22742462158203, 162.21737670898438, 491.2115173339844, 407.759521484375, 209.18067932128906, 222.64488220214844, 224.69639587402344, -23.04441261291504, 357.92205810546875, -4.621753692626953, 79.80909729003906, 32.29686737060547, 218.53335571289062, 130.65737915039062, 701.151611328125, 243.2913818359375, 229.25791931152344, 316.0589294433594], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000306.npy"}
|
|
{"epoch": 0.44933920704845814, "step": 307, "batch_size": 64, "mean": 139.84461975097656, "std": 163.13421630859375, "min": -323.2281188964844, "p10": -47.190644073486325, "median": 103.10649871826172, "p90": 363.5140777587891, "max": 540.6759643554688, "pos_frac": 0.859375, "sample": [295.4858093261719, 139.746826171875, 80.4481430053711, 207.90341186523438, 195.64468383789062, 123.50587463378906, -77.29779052734375, 58.960479736328125, -46.51557922363281, 451.2311706542969, 166.47769165039062, 30.224403381347656, 86.72744750976562, -97.27452087402344, 319.74615478515625, -323.2281188964844, 452.3637390136719, -98.31538391113281, 212.43450927734375, -47.479957580566406, 163.73892211914062, 231.9791259765625, 80.0896987915039, 159.13027954101562, 448.45013427734375, 179.30494689941406, 86.47804260253906, 43.261322021484375, 99.57791137695312, 386.0048522949219, 75.68108367919922, 363.9723815917969, 77.54659271240234, 81.86302185058594, -10.405380249023438, 91.47421264648438, 83.85037231445312, 66.93339538574219, 362.4447021484375, 318.2713623046875, 26.514198303222656, 269.1728515625, 26.608314514160156, 248.71609497070312, 196.2588653564453, 50.71098327636719, 190.05564880371094, 256.6920166015625, 9.571069717407227, 106.63508605957031, 183.50270080566406, 9.486263275146484, 319.142822265625, 458.8122863769531, 253.39593505859375, 540.6759643554688, 26.77007293701172, -67.87980651855469, 201.84104919433594, 37.114715576171875, 12.091384887695312, 202.06838989257812, 0.9250030517578125, -129.26246643066406], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000307.npy"}
|
|
{"epoch": 0.45080763582966227, "step": 308, "batch_size": 64, "mean": 156.81976318359375, "std": 153.76368713378906, "min": -165.61874389648438, "p10": -27.90379772186279, "median": 162.77597045898438, "p90": 373.5545043945313, "max": 474.1165466308594, "pos_frac": 0.859375, "sample": [179.68211364746094, 356.577880859375, -135.3575439453125, 218.16497802734375, 23.237106323242188, 415.81768798828125, -165.61874389648438, 9.076541900634766, 83.0531005859375, 75.78231048583984, 337.702880859375, 43.312442779541016, 179.7613983154297, 92.14004516601562, 250.02227783203125, 418.2004699707031, -110.35903930664062, 176.4330291748047, 131.1640625, 10.563850402832031, 171.4477996826172, 474.1165466308594, 301.671630859375, 380.8302001953125, -29.107635498046875, 214.40147399902344, 146.67440795898438, 71.87156677246094, 125.96621704101562, 12.071367263793945, 230.70635986328125, 131.82049560546875, 162.712890625, 239.8478546142578, 174.67604064941406, 342.57281494140625, 388.2467956542969, 111.81509399414062, -53.40214538574219, 35.05314636230469, 20.15284538269043, 283.60418701171875, 249.14999389648438, 131.23434448242188, 231.04039001464844, 459.1943054199219, 162.83905029296875, 73.47782897949219, 29.041893005371094, 238.56277465820312, 470.364013671875, 238.29641723632812, 165.9429931640625, 10.057992935180664, -25.0948429107666, 63.02854919433594, 314.4969482421875, 251.43826293945312, 77.96586608886719, -46.067405700683594, 330.3315734863281, 182.71945190429688, -79.78118896484375, -18.881019592285156], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000308.npy"}
|
|
{"epoch": 0.4522760646108664, "step": 309, "batch_size": 64, "mean": 159.3053741455078, "std": 172.63929748535156, "min": -303.7376708984375, "p10": -48.982390213012685, "median": 146.3067398071289, "p90": 376.29924621582035, "max": 566.580322265625, "pos_frac": 0.8125, "sample": [97.704345703125, 75.34260559082031, 76.48326873779297, -76.79483032226562, 239.95401000976562, 154.03182983398438, 108.77266693115234, -11.477800369262695, 425.8056640625, 35.71209716796875, 307.50128173828125, 384.20166015625, 123.78742218017578, 458.1992492675781, 115.38574981689453, 55.891563415527344, -53.52433395385742, 284.7713317871094, 352.9625549316406, 294.9894714355469, 218.0642547607422, -303.7376708984375, 220.42507934570312, -65.64280700683594, 113.80447387695312, 104.73885345458984, 247.74298095703125, 193.71511840820312, -5.4671478271484375, -38.384521484375, -87.3281021118164, 557.9610595703125, 71.09866333007812, 5.2986907958984375, 143.5601348876953, 367.306640625, 566.580322265625, 326.93829345703125, 111.58806610107422, -21.364547729492188, 37.22952651977539, 60.29591369628906, 85.98143005371094, 344.960205078125, 228.418701171875, 237.62884521484375, 377.87396240234375, 236.26380920410156, 234.4468231201172, 20.902019500732422, 338.3513488769531, -171.01025390625, 190.4490966796875, 249.6795654296875, -90.15240478515625, 52.73247146606445, 372.6249084472656, 383.02783203125, -34.00749206542969, 149.0533447265625, 172.38479614257812, 64.94552612304688, 302.26080322265625, 174.60536193847656], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000309.npy"}
|
|
{"epoch": 0.45374449339207046, "step": 310, "batch_size": 64, "mean": 151.4144744873047, "std": 144.39625549316406, "min": -159.55752563476562, "p10": -20.064884948730466, "median": 151.31673431396484, "p90": 351.641961669922, "max": 482.56781005859375, "pos_frac": 0.828125, "sample": [-108.29924011230469, 125.14041900634766, 232.93008422851562, -36.228416442871094, 78.68373107910156, 257.8806457519531, 66.65397644042969, 16.810022354125977, 48.53388977050781, 187.55239868164062, -104.99689483642578, 240.30357360839844, 383.672607421875, -14.87735366821289, 73.8499755859375, 231.80133056640625, 195.8233184814453, 194.6906280517578, 96.96807861328125, 183.87245178222656, 45.08134078979492, 102.67771911621094, 281.7281799316406, 61.79862976074219, 101.08087158203125, 383.9676513671875, 246.66001892089844, 47.22934341430664, 32.62745666503906, 240.18756103515625, -159.55752563476562, 436.4698181152344, 24.126476287841797, 143.608642578125, 482.56781005859375, 166.85281372070312, 232.53689575195312, -12.918989181518555, -14.687355041503906, -17.496986389160156, 77.30279541015625, 141.77175903320312, 312.72271728515625, 187.5321807861328, 101.64847564697266, 292.9281005859375, 404.9307861328125, -21.16541290283203, 324.32568359375, 179.43130493164062, 224.00473022460938, -90.97943878173828, 283.0151672363281, 202.67591857910156, -23.303604125976562, 70.3305435180664, 82.26068115234375, 363.34893798828125, 424.8816833496094, 195.02113342285156, 91.0755615234375, 322.3190002441406, 210.1172332763672, 159.0248260498047], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000310.npy"}
|
|
{"epoch": 0.4552129221732746, "step": 311, "batch_size": 64, "mean": 176.9522247314453, "std": 209.6773681640625, "min": -185.90342712402344, "p10": -86.07362670898438, "median": 159.8082733154297, "p90": 463.66656494140636, "max": 714.0162353515625, "pos_frac": 0.796875, "sample": [348.8841247558594, 325.2520446777344, 355.5225830078125, 6.084938049316406, -161.53077697753906, 385.72723388671875, -82.95477294921875, 298.4923095703125, 221.48013305664062, 517.5159912109375, 246.15817260742188, 68.00753784179688, 327.29937744140625, 647.6973266601562, 229.43084716796875, 120.18628692626953, 413.23699951171875, 110.24774932861328, 42.277671813964844, 436.8861389160156, 206.49864196777344, 236.06732177734375, 475.1438903808594, 212.15003967285156, 50.499046325683594, 112.71989440917969, 11.177200317382812, -147.86624145507812, 132.92453002929688, 124.28264617919922, -119.96746826171875, 249.59063720703125, -103.35356903076172, 201.67430114746094, 284.24542236328125, -87.4102783203125, -0.863616943359375, 714.0162353515625, -28.506927490234375, 247.63543701171875, 53.195228576660156, 119.7255630493164, 242.969970703125, 38.635589599609375, 536.994140625, 235.40841674804688, 8.74405288696289, -23.681608200073242, 50.36966323852539, 105.15170288085938, -10.177486419677734, 556.817626953125, 106.3843994140625, 9.680442810058594, -185.90342712402344, 395.95941162109375, 187.11874389648438, 186.6920166015625, 2.5764999389648438, 206.5295867919922, 414.703369140625, -120.56694030761719, -23.415252685546875, 604.4711303710938], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000311.npy"}
|
|
{"epoch": 0.4566813509544787, "step": 312, "batch_size": 64, "mean": 165.17715454101562, "std": 197.08203125, "min": -232.94622802734375, "p10": -15.25528011322021, "median": 136.6379623413086, "p90": 415.4993255615235, "max": 734.3399658203125, "pos_frac": 0.84375, "sample": [115.48997497558594, 733.0648193359375, 444.39776611328125, 133.96609497070312, 107.18989562988281, 112.54410552978516, 16.14708709716797, 89.78457641601562, 109.82202911376953, 351.8917236328125, -232.94622802734375, 402.959716796875, 734.3399658203125, 220.36790466308594, 209.1648406982422, 236.47891235351562, -152.00619506835938, 189.478759765625, 354.86932373046875, 179.22119140625, 420.8734436035156, 18.363018035888672, 98.16725158691406, 249.06814575195312, 78.89910888671875, 42.73701477050781, 80.9009017944336, 507.7275695800781, 304.2931823730469, 380.3682861328125, 129.10635375976562, 289.7226867675781, 164.73361206054688, 159.8704071044922, -136.22305297851562, 125.52485656738281, 188.63771057128906, 426.80670166015625, 42.14848327636719, 31.736034393310547, 257.8550109863281, 100.22711944580078, -10.24742317199707, 288.21258544921875, 158.4990234375, 65.11541748046875, -19.653839111328125, 642.0064086914062, 5.3617095947265625, 46.80683135986328, 203.35354614257812, 149.62657165527344, -180.38577270507812, -6.6289215087890625, -8.798606872558594, 184.49644470214844, 139.30982971191406, -17.401504516601562, -221.85574340820312, 304.17791748046875, 228.9577178955078, 12.635513305664062, 47.6330451965332, 242.34695434570312], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000312.npy"}
|
|
{"epoch": 0.4581497797356828, "step": 313, "batch_size": 64, "mean": 160.43019104003906, "std": 206.833251953125, "min": -306.43963623046875, "p10": -72.41275253295893, "median": 137.39537811279297, "p90": 382.0883941650391, "max": 728.5578002929688, "pos_frac": 0.828125, "sample": [96.31732177734375, 278.2213134765625, 68.15274047851562, 280.0408935546875, -154.7672119140625, 12.533981323242188, 389.8716125488281, 116.3360595703125, 346.56585693359375, 35.27524185180664, 39.80165100097656, -152.7827911376953, 256.2418212890625, 135.75559997558594, 18.428659439086914, 152.29736328125, 263.6928405761719, 200.2279510498047, 72.7101821899414, 158.4854736328125, 85.3563003540039, 310.197265625, -306.43963623046875, -90.92027282714844, 192.69842529296875, 485.8232727050781, 264.725341796875, 85.05136108398438, 196.21286010742188, 618.4884033203125, 310.79168701171875, 139.03515625, 11.804445266723633, 96.88188171386719, -211.08224487304688, 85.12765502929688, -29.228540420532227, -300.5089111328125, 363.92755126953125, 223.8906707763672, -14.471260070800781, 196.016357421875, 34.65021896362305, 273.88897705078125, 288.2333068847656, 124.53144073486328, 29.055030822753906, 331.1597595214844, 310.94488525390625, -131.1724090576172, 362.61187744140625, 99.8612060546875, 139.68215942382812, -6.452018737792969, 117.68600463867188, 573.2351684570312, 565.264892578125, 3.5198211669921875, -17.07819366455078, 65.22045135498047, 218.92984008789062, 323.62738037109375, 504.7904052734375, 728.5578002929688], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000313.npy"}
|
|
{"epoch": 0.45961820851688695, "step": 314, "batch_size": 64, "mean": 156.090576171875, "std": 219.34625244140625, "min": -325.4586181640625, "p10": -117.60101623535154, "median": 134.30924224853516, "p90": 418.4380035400391, "max": 668.8233642578125, "pos_frac": 0.71875, "sample": [590.857421875, -252.059814453125, 111.6624984741211, 70.06002044677734, 60.73699951171875, 243.75753784179688, 86.1871337890625, 239.99111938476562, 83.09884643554688, -64.22016906738281, 270.1015625, -15.081985473632812, 199.84835815429688, 164.39881896972656, 195.29598999023438, -325.4586181640625, -129.24046325683594, 77.37486267089844, 579.5902709960938, 605.8084106445312, 23.606826782226562, -32.275596618652344, -132.2563934326172, 145.9893035888672, 125.11990356445312, -126.58250427246094, -34.02403259277344, -95.28579711914062, 36.716346740722656, 372.60235595703125, 357.69244384765625, 53.66229248046875, 264.9966735839844, 395.85174560546875, 295.82830810546875, 140.9835968017578, -74.9660873413086, 124.35118865966797, -5.648712158203125, 602.6326293945312, 423.4728088378906, 375.9097595214844, 668.8233642578125, 368.9066162109375, -11.505435943603516, 279.0315856933594, -136.55325317382812, 394.67291259765625, -13.210403442382812, 190.30059814453125, 288.7547607421875, 42.25823974609375, 258.3937072753906, 336.3843994140625, 406.69012451171875, 426.8385009765625, 147.2508087158203, 127.6348876953125, 216.78794860839844, 2.8474655151367188, -96.64421081542969, -51.88618850708008, 254.82199096679688, -141.88768005371094], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000314.npy"}
|
|
{"epoch": 0.461086637298091, "step": 315, "batch_size": 64, "mean": 140.48963928222656, "std": 174.15679931640625, "min": -176.02432250976562, "p10": -50.498387527465816, "median": 108.91732025146484, "p90": 385.0887939453126, "max": 620.974853515625, "pos_frac": 0.796875, "sample": [426.3359375, -16.258779525756836, 195.4217987060547, 27.536163330078125, -50.83976364135742, 97.26982116699219, 55.49732971191406, 151.06224060058594, 195.8823699951172, -13.161270141601562, 264.953857421875, 29.957216262817383, -127.09806823730469, 187.29995727539062, 92.38805389404297, 443.8568115234375, 340.5313720703125, 155.09410095214844, -157.17962646484375, 321.3399963378906, 103.28155517578125, 401.0466613769531, 406.62408447265625, 45.34979248046875, -61.230499267578125, 140.12408447265625, 283.98333740234375, 137.7878875732422, 337.0684509277344, 114.55308532714844, 394.2026672363281, 218.44691467285156, 45.516082763671875, 118.33662414550781, 211.1707305908203, 273.42559814453125, -49.70184326171875, -24.9854736328125, 250.5475311279297, 13.442855834960938, 42.164737701416016, 1.595855712890625, 620.974853515625, 37.91871643066406, 210.89276123046875, 12.598838806152344, -155.87664794921875, 27.157258987426758, -89.76634216308594, 363.8230895996094, 97.9228515625, 205.9272918701172, 579.5167846679688, 100.72888946533203, 148.2607879638672, -14.580110549926758, 286.3697509765625, 55.214359283447266, 305.0502014160156, 286.6475830078125, -176.02432250976562, -40.84764099121094, 57.95431900024414, 48.83318328857422], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000315.npy"}
|
|
{"epoch": 0.46255506607929514, "step": 316, "batch_size": 64, "mean": 187.62777709960938, "std": 193.6260528564453, "min": -168.1973114013672, "p10": 3.7253261566162115, "median": 153.54529571533203, "p90": 503.84864501953126, "max": 719.215576171875, "pos_frac": 0.90625, "sample": [305.99432373046875, 94.43418884277344, 440.52734375, 76.3232421875, 93.14507293701172, 652.091796875, 159.93380737304688, 523.1474609375, 384.305419921875, 506.66473388671875, 304.31585693359375, 272.9517822265625, 17.914413452148438, -12.713375091552734, 33.79735565185547, 175.52720642089844, 104.10270690917969, 134.98194885253906, 3.5716514587402344, 289.1328125, 69.76399993896484, 62.991886138916016, 526.3596801757812, 337.64703369140625, 230.77044677734375, 90.64441680908203, 38.95431900024414, 497.27777099609375, 139.1060028076172, 400.294921875, 34.747283935546875, 94.86272430419922, 33.83457946777344, 177.68934631347656, 219.23721313476562, 299.25836181640625, 316.22845458984375, 28.832054138183594, 174.171875, 11.489555358886719, 519.0084228515625, 266.96917724609375, 27.262603759765625, 326.1988830566406, 159.60623168945312, 132.57186889648438, -168.1973114013672, 25.460491180419922, 719.215576171875, 147.48435974121094, 544.489990234375, -159.23434448242188, 171.03175354003906, 181.3148956298828, 306.0528564453125, 4.083900451660156, -67.9574203491211, -43.33653259277344, 9.970527648925781, 116.6485824584961, 242.73971557617188, -124.13793182373047, 78.56002807617188, 248.05929565429688], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000316.npy"}
|
|
{"epoch": 0.46402349486049926, "step": 317, "batch_size": 64, "mean": 212.607666015625, "std": 182.19229125976562, "min": -118.09593200683594, "p10": -20.12453060150146, "median": 208.60215759277344, "p90": 443.76849060058595, "max": 572.0674438476562, "pos_frac": 0.859375, "sample": [312.11456298828125, 57.32105255126953, 28.73566436767578, 380.1905822753906, 444.2498474121094, 59.368804931640625, 334.76568603515625, 339.1259765625, -118.09593200683594, 424.03466796875, 560.2219848632812, 127.33705139160156, 305.62591552734375, 49.80279541015625, 137.79397583007812, 330.0587463378906, 164.71299743652344, 12.532295227050781, 110.47769927978516, 321.9447021484375, 29.80169677734375, 262.144775390625, 305.34124755859375, 442.64532470703125, 47.11497116088867, 442.0414123535156, -66.08416748046875, 116.23981475830078, 572.0674438476562, 284.92535400390625, 503.43463134765625, 371.9656982421875, 201.96424865722656, 438.1898498535156, 338.530517578125, -0.9003753662109375, 169.05430603027344, 145.5482635498047, 269.759765625, 117.58547973632812, 109.48202514648438, 451.1182556152344, -15.311594009399414, -30.233720779418945, 505.63201904296875, 255.33578491210938, 432.6772766113281, 233.67111206054688, 236.34536743164062, -86.4195785522461, 99.52827453613281, 127.06024932861328, 215.2400665283203, 17.22113800048828, -22.187217712402344, 145.95907592773438, 93.71055603027344, -76.00862884521484, 551.215576171875, 289.4862060546875, 65.58036041259766, 388.1686096191406, -63.75810241699219, 309.6878967285156], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000317.npy"}
|
|
{"epoch": 0.4654919236417034, "step": 318, "batch_size": 64, "mean": 167.8129425048828, "std": 179.82887268066406, "min": -246.6536407470703, "p10": -32.4613712310791, "median": 136.4801025390625, "p90": 380.7280517578125, "max": 654.9163208007812, "pos_frac": 0.8125, "sample": [61.66089630126953, -17.235183715820312, 654.9163208007812, 134.0635986328125, 211.27212524414062, 92.17924499511719, -7.887018203735352, 145.38671875, 134.38836669921875, 570.145263671875, 27.21348762512207, 108.48193359375, 256.155517578125, 312.51812744140625, 499.71173095703125, 281.97808837890625, 25.435073852539062, 89.40434265136719, -39.480674743652344, -16.46354103088379, 382.58587646484375, 49.250144958496094, -102.30339050292969, 435.49578857421875, 593.8236694335938, 265.57550048828125, -45.98672103881836, -34.88755416870117, 272.8246765136719, 287.0027770996094, 54.530059814453125, 333.8118591308594, 56.978492736816406, 341.8836364746094, 190.47422790527344, 136.52481079101562, 425.6423034667969, -9.98089599609375, -50.397125244140625, 376.1023864746094, 376.39312744140625, 233.34164428710938, 16.937519073486328, 80.31014251708984, 269.37371826171875, 194.83706665039062, 136.43539428710938, 219.61831665039062, 106.56645202636719, -26.800277709960938, -246.6536407470703, 88.36058807373047, 227.5335693359375, 173.0670166015625, 313.2873840332031, 334.44879150390625, 204.28280639648438, 126.0003662109375, 43.84381103515625, 200.24856567382812, 108.68994903564453, 225.94558715820312, -160.63189697265625, 11.797142028808594], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000318.npy"}
|
|
{"epoch": 0.4669603524229075, "step": 319, "batch_size": 64, "mean": 178.05307006835938, "std": 195.85464477539062, "min": -238.6461181640625, "p10": -92.83422851562499, "median": 180.94383239746094, "p90": 414.3821990966797, "max": 701.7128295898438, "pos_frac": 0.8125, "sample": [237.933349609375, 128.2335662841797, 257.7636413574219, 203.26583862304688, 140.73509216308594, 242.90802001953125, 289.0832824707031, 370.26409912109375, 232.87893676757812, 75.38021087646484, -62.556846618652344, -222.19003295898438, 285.4772644042969, 128.7104034423828, 187.35824584960938, 159.34754943847656, 57.55982208251953, 202.8462371826172, 270.4900207519531, 129.24041748046875, 129.78053283691406, 347.483154296875, 162.92752075195312, 298.9527587890625, 232.6511993408203, 4.3018341064453125, 267.3109436035156, 660.931640625, 297.2567443847656, 302.79248046875, -110.63751220703125, 118.01211547851562, 114.56266784667969, 382.62176513671875, -220.505859375, 212.47183227539062, -238.6461181640625, -97.79299926757812, -81.26376342773438, 292.758544921875, 495.3324890136719, 414.4281311035156, 491.6150207519531, -19.874448776245117, 144.61561584472656, 6.52398681640625, 470.10760498046875, 452.0682067871094, 252.24891662597656, 414.2750244140625, 701.7128295898438, -105.96974182128906, 121.88401794433594, 317.86419677734375, -38.97563552856445, 30.742935180664062, -133.27615356445312, -39.829933166503906, 217.47470092773438, 140.82154846191406, 183.70037841796875, 178.18728637695312, 175.19573974609375, 133.82533264160156], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000319.npy"}
|
|
{"epoch": 0.4684287812041116, "step": 320, "batch_size": 64, "mean": 207.61529541015625, "std": 205.1048126220703, "min": -198.09597778320312, "p10": -19.59660186767578, "median": 188.40513610839844, "p90": 466.46783142089845, "max": 889.69921875, "pos_frac": 0.84375, "sample": [150.78228759765625, 195.0989227294922, 710.1944580078125, 112.47154235839844, 286.17877197265625, -13.381523132324219, 191.54885864257812, -198.09597778320312, 682.1680297851562, 145.73207092285156, 383.9991760253906, 215.22714233398438, -36.44538879394531, 316.10711669921875, 174.76348876953125, 473.73870849609375, 460.697509765625, 229.20632934570312, 282.21044921875, -64.20655059814453, 286.519775390625, 282.95263671875, 35.508766174316406, 185.69708251953125, 60.342933654785156, 89.7455062866211, 328.66552734375, 276.6009826660156, -19.0263671875, 468.9408264160156, -27.515701293945312, 238.07217407226562, 24.928184509277344, 190.81396484375, 365.0025634765625, 152.27346801757812, 97.3892822265625, 147.6322021484375, 889.69921875, 171.34671020507812, 391.1335144042969, 76.12427520751953, 103.18511962890625, -19.840988159179688, 533.0955200195312, 41.60973358154297, 270.4084167480469, 163.34498596191406, -5.878461837768555, 19.00701141357422, 274.3804016113281, 321.4447021484375, 234.85572814941406, 351.161865234375, 390.769775390625, 514.51708984375, 438.4830322265625, 2.776792526245117, -72.92288208007812, 17.286048889160156, 31.198974609375, 191.3589630126953, 185.99630737304688, -109.703369140625], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000320.npy"}
|
|
{"epoch": 0.4698972099853157, "step": 321, "batch_size": 64, "mean": 174.6695556640625, "std": 215.9467010498047, "min": -320.59332275390625, "p10": -55.60642890930174, "median": 162.74159240722656, "p90": 457.4555328369142, "max": 719.493896484375, "pos_frac": 0.796875, "sample": [583.3719482421875, 111.60108184814453, 225.93614196777344, 249.44345092773438, -37.38195037841797, 303.34210205078125, 282.6605529785156, 164.61053466796875, 139.758544921875, 425.04296875, 397.4420471191406, 291.781494140625, 32.492156982421875, -1.2019081115722656, -146.3685760498047, 234.79237365722656, 432.6285400390625, 262.99969482421875, 203.49649047851562, 396.95831298828125, 621.31494140625, 83.13452911376953, -320.59332275390625, 224.68841552734375, 166.23898315429688, 291.2326965332031, 497.90655517578125, -277.5605773925781, -80.79341125488281, 131.5064239501953, 99.87110900878906, 468.0956726074219, -22.99664306640625, 298.2012939453125, 160.87265014648438, 397.0527648925781, 153.7997283935547, -153.349853515625, 719.493896484375, 123.41596221923828, -35.148101806640625, 16.37847328186035, -17.523061752319336, 116.85708618164062, 17.716392517089844, 159.45236206054688, 47.057373046875, -63.41691970825195, 142.47952270507812, 218.1326141357422, 611.7348022460938, 525.7247314453125, 22.934772491455078, -295.8958740234375, 169.5682373046875, 174.81875610351562, 169.21234130859375, 198.28262329101562, 40.55889892578125, 376.38580322265625, 143.7371826171875, 220.23995971679688, 103.6702880859375, -19.044708251953125], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000321.npy"}
|
|
{"epoch": 0.4713656387665198, "step": 322, "batch_size": 64, "mean": 187.06900024414062, "std": 179.90158081054688, "min": -177.0634765625, "p10": -30.643476867675776, "median": 193.97863006591797, "p90": 434.28635253906253, "max": 615.75439453125, "pos_frac": 0.8125, "sample": [-63.500648498535156, 513.9259033203125, 222.05340576171875, 615.75439453125, 77.4626693725586, -69.32009887695312, 431.94342041015625, 314.656005859375, 255.0904541015625, -70.17559814453125, -26.818374633789062, 1.8919754028320312, 284.984619140625, 249.55364990234375, 262.1716613769531, 337.46356201171875, 23.85480499267578, 190.65713500976562, -10.078041076660156, -32.282806396484375, 355.3509826660156, 466.60931396484375, 217.947021484375, 503.3016357421875, -9.5526123046875, 185.60470581054688, 338.00244140625, 435.29046630859375, -88.4666976928711, 443.8465881347656, -12.63044548034668, 173.7423095703125, 65.23544311523438, 344.77459716796875, 10.0836181640625, 26.050411224365234, 197.3001251220703, 158.88272094726562, 13.200443267822266, 234.95721435546875, 323.784423828125, 431.9398193359375, 328.30743408203125, -77.67498016357422, 337.99951171875, 151.9748992919922, 168.93772888183594, -21.669414520263672, 215.3732452392578, 121.01918029785156, 80.84244537353516, 510.48846435546875, 198.36251831054688, 273.3720703125, 305.56768798828125, 277.25604248046875, 72.4541015625, 105.90198516845703, 29.253084182739258, -177.0634765625, 20.507720947265625, 188.7549285888672, 331.9858093261719, 205.92294311523438], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000322.npy"}
|
|
{"epoch": 0.47283406754772395, "step": 323, "batch_size": 64, "mean": 164.98715209960938, "std": 195.4263458251953, "min": -355.83184814453125, "p10": -61.088630676269524, "median": 179.25020599365234, "p90": 430.280029296875, "max": 667.3925170898438, "pos_frac": 0.828125, "sample": [342.2906188964844, 247.531494140625, 419.96142578125, -355.83184814453125, 484.4765930175781, -199.26071166992188, 475.03118896484375, 667.3925170898438, 54.31583023071289, 98.48204803466797, 278.3350830078125, 30.174774169921875, 134.01681518554688, 133.5076141357422, 7.925559997558594, 228.7002410888672, 84.34772491455078, 246.3090057373047, 8.775749206542969, 561.6674194335938, -32.51917266845703, 70.24076843261719, 431.6331787109375, -63.554283142089844, -72.21177673339844, 299.0843200683594, 57.992897033691406, 172.62266540527344, 316.21563720703125, 73.5352783203125, 373.9385070800781, 185.87774658203125, 258.89990234375, -63.60862731933594, 246.94139099121094, 14.741409301757812, 73.94358825683594, 113.24736785888672, 284.28155517578125, 231.50494384765625, 282.899169921875, 48.841033935546875, 210.02935791015625, 427.1226806640625, -230.2979736328125, 190.93861389160156, 314.5616760253906, 447.6591796875, 267.84344482421875, -36.399314880371094, 210.09130859375, 440.9676513671875, -55.33544158935547, 230.43125915527344, -220.2426300048828, 207.17730712890625, 123.04925537109375, 192.38168334960938, 58.907554626464844, 257.16986083984375, 86.10247039794922, -4.418048858642578, 139.01364135742188, 49.727142333984375], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000323.npy"}
|
|
{"epoch": 0.47430249632892807, "step": 324, "batch_size": 64, "mean": 175.08053588867188, "std": 218.1950225830078, "min": -285.26055908203125, "p10": -97.45761413574218, "median": 167.2236785888672, "p90": 471.13924255371103, "max": 806.6260986328125, "pos_frac": 0.8125, "sample": [-104.8865966796875, 488.68035888671875, 9.34234619140625, 169.552490234375, 71.87230682373047, 171.9686279296875, 285.7064514160156, 255.5892333984375, 183.08030700683594, 207.66094970703125, 614.4475708007812, 18.845550537109375, 230.95550537109375, 92.05715942382812, -18.780323028564453, 565.926513671875, -183.1467742919922, -285.26055908203125, 271.7880859375, 231.600341796875, 26.392379760742188, 164.89486694335938, -128.0732421875, 251.0894775390625, 252.3383026123047, 661.8974609375, 377.91583251953125, -130.50828552246094, 120.43994140625, 326.6051330566406, 445.9883117675781, 123.04780578613281, 137.49407958984375, 136.4613037109375, 306.2090148925781, 311.0071716308594, 136.67076110839844, 292.4248352050781, 147.8853759765625, 3.952627182006836, 286.03363037109375, -284.2236328125, 170.2947998046875, 367.2615966796875, 481.918212890625, 308.08099365234375, 84.98869323730469, -253.4016876220703, -80.12332153320312, 157.83187866210938, 193.12506103515625, 121.0619125366211, 58.33772277832031, -17.29909896850586, 178.28004455566406, 148.89625549316406, 377.23455810546875, -79.07664489746094, 20.31069564819336, 806.6260986328125, 486.09246826171875, 160.43251037597656, -16.456390380859375, 287.79510498046875], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000324.npy"}
|
|
{"epoch": 0.47577092511013214, "step": 325, "batch_size": 64, "mean": 142.56036376953125, "std": 164.69024658203125, "min": -286.47796630859375, "p10": -18.243771362304688, "median": 139.3031234741211, "p90": 345.8003387451172, "max": 548.60302734375, "pos_frac": 0.8125, "sample": [-286.47796630859375, -190.41766357421875, 231.53976440429688, -64.59374237060547, 276.2025146484375, 235.89175415039062, 116.6241226196289, -63.62898254394531, 228.8355712890625, 304.570068359375, 235.7311553955078, 400.65570068359375, 242.912109375, 146.9519805908203, 192.5016326904297, 95.82929229736328, 65.2378921508789, 74.92335510253906, 26.608474731445312, 245.80662536621094, -18.36278533935547, 290.09796142578125, 249.98281860351562, 424.74310302734375, 18.841800689697266, -17.96607208251953, 134.7503204345703, 101.0499496459961, 101.95030212402344, 32.88129425048828, 346.2228088378906, 61.43074035644531, 547.2540283203125, -3.4620628356933594, 448.917724609375, -5.915504455566406, 379.44842529296875, 5.236461639404297, 229.77816772460938, 196.36245727539062, -37.468406677246094, 122.2342758178711, 148.42214965820312, -3.545747756958008, 211.130126953125, 260.1984558105469, 97.7345199584961, 227.99862670898438, 55.713470458984375, 199.54962158203125, 234.34872436523438, -241.75225830078125, 18.34619140625, 548.60302734375, 30.4864501953125, 143.85592651367188, 344.8145751953125, 91.82738494873047, -15.776473999023438, 183.52685546875, 162.4029998779297, 60.1330451965332, 44.109527587890625, 198.02499389648438], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000325.npy"}
|
|
{"epoch": 0.47723935389133626, "step": 326, "batch_size": 64, "mean": 135.87144470214844, "std": 145.0045623779297, "min": -231.5555419921875, "p10": -19.597317123413085, "median": 121.59685516357422, "p90": 344.7104675292971, "max": 539.1754150390625, "pos_frac": 0.8125, "sample": [-3.66729736328125, 32.05130386352539, 60.43782043457031, 379.076904296875, 129.4201202392578, -2.894134521484375, 61.02475357055664, 113.77359008789062, 232.30685424804688, -165.7412109375, -20.718692779541016, -231.5555419921875, 416.61962890625, 68.53191375732422, -59.93895721435547, 97.27909851074219, 131.85812377929688, 134.75152587890625, 189.89083862304688, 279.5145568847656, 37.010528564453125, 34.55902099609375, 233.44436645507812, 255.96609497070312, -7.77105712890625, 113.58573913574219, 539.1754150390625, 155.35169982910156, 68.02153015136719, 66.86585998535156, 287.5596008300781, 171.83627319335938, 283.3368225097656, 168.17755126953125, 178.10723876953125, -16.98077392578125, 74.46340942382812, 280.1035461425781, -36.032901763916016, 156.85601806640625, 95.64253234863281, 19.94647216796875, 139.62588500976562, 109.5345458984375, 53.262908935546875, 85.03120422363281, 166.71035766601562, 172.07513427734375, 155.69786071777344, 253.29063415527344, -59.88655090332031, 370.4867248535156, 417.5675354003906, 111.9919662475586, 293.044921875, 170.12435913085938, 377.47235107421875, 198.99295043945312, -1.8407001495361328, -56.74943542480469, 366.85284423828125, 283.7137451171875, 5.262042999267578, 82.26473236083984], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000326.npy"}
|
|
{"epoch": 0.4787077826725404, "step": 327, "batch_size": 64, "mean": 198.48397827148438, "std": 181.97647094726562, "min": -201.25625610351562, "p10": -46.09343490600584, "median": 200.32440185546875, "p90": 426.1481079101563, "max": 651.75927734375, "pos_frac": 0.859375, "sample": [163.77734375, 126.78104400634766, 391.0753173828125, 184.54022216796875, 157.00416564941406, 340.7115173339844, 15.853981018066406, 223.11383056640625, 267.3961181640625, 140.67987060546875, 292.3739318847656, 328.0206298828125, 83.55647277832031, 326.10626220703125, -55.91553497314453, 258.3934020996094, 32.239566802978516, 395.7842712402344, 225.52896118164062, 268.1026611328125, -201.25625610351562, 531.6641845703125, 455.36895751953125, 380.3995666503906, 1.2467632293701172, 466.562744140625, 280.4881591796875, -97.08399963378906, 203.11801147460938, 289.59576416015625, 316.0389709472656, -130.484619140625, -2.569580078125, 162.34942626953125, 255.7363739013672, 186.3564910888672, 244.4989776611328, 180.56175231933594, 240.27529907226562, 197.53079223632812, 64.98670959472656, 40.08203125, -23.175199508666992, 427.9750061035156, 173.5626220703125, -72.42391967773438, 69.54148864746094, 294.20220947265625, 336.05255126953125, 123.32473754882812, 455.587646484375, 68.37071228027344, 651.75927734375, -151.4618377685547, 421.8853454589844, 87.0189208984375, 98.11845397949219, 448.5832824707031, 55.45048522949219, 377.7725830078125, 295.2052001953125, 399.8563232421875, 104.7880630493164, -169.60992431640625], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000327.npy"}
|
|
{"epoch": 0.4801762114537445, "step": 328, "batch_size": 64, "mean": 169.84080505371094, "std": 197.94525146484375, "min": -313.9453430175781, "p10": -33.810686874389646, "median": 116.52522659301758, "p90": 428.1798065185547, "max": 683.24560546875, "pos_frac": 0.796875, "sample": [180.97914123535156, -116.97720336914062, 105.41080474853516, 108.56027221679688, 50.96441650390625, 266.315185546875, 266.02056884765625, 89.33232116699219, 71.54069519042969, -313.9453430175781, 50.03656768798828, 350.7940673828125, 429.25213623046875, 320.74627685546875, 200.17811584472656, 342.94927978515625, -3.6589584350585938, -130.6271209716797, 15.267606735229492, 108.66693115234375, -42.69625473022461, -17.638336181640625, 331.4840393066406, 661.6646728515625, 213.88006591796875, 285.6948547363281, 317.1282653808594, 125.18380737304688, 513.3062133789062, 353.656494140625, -34.28759765625, 154.78463745117188, 259.40618896484375, -32.69789505004883, 26.073598861694336, 94.55487060546875, 254.81808471679688, 305.7663879394531, 169.1556396484375, 98.51498413085938, -4.943971633911133, 95.10327911376953, -85.15048217773438, 208.14114379882812, -17.83155059814453, 396.7185363769531, 123.71195983886719, 237.8946075439453, 237.64364624023438, 109.33849334716797, 425.6777038574219, 469.5276184082031, 66.08872985839844, 19.4810791015625, 0.07376480102539062, 616.213134765625, 683.24560546875, 51.57698059082031, 480.78424072265625, -1.5281791687011719, 56.90979766845703, 284.4698181152344, 89.73664855957031, -102.63005828857422], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000328.npy"}
|
|
{"epoch": 0.48164464023494863, "step": 329, "batch_size": 64, "mean": 198.17344665527344, "std": 212.9168701171875, "min": -386.0366516113281, "p10": -46.37753200531004, "median": 190.55953216552734, "p90": 464.1711090087892, "max": 812.4570922851562, "pos_frac": 0.828125, "sample": [138.58499145507812, 342.22467041015625, 398.06695556640625, 264.35687255859375, 212.40206909179688, -63.693199157714844, 426.8044738769531, 50.257484436035156, 432.49072265625, 201.5244140625, 86.02386474609375, 330.7364196777344, 64.55374908447266, -4.944000244140625, 121.53993225097656, 203.72293090820312, 181.9099884033203, 203.57437133789062, 39.667938232421875, 596.79931640625, 124.01708221435547, 93.93113708496094, 225.19236755371094, 167.84197998046875, -386.0366516113281, 151.11917114257812, 57.903709411621094, 812.4570922851562, 506.5825500488281, -16.632766723632812, 266.5645751953125, -17.17269515991211, 316.45989990234375, 48.997642517089844, 238.7062225341797, 362.0815734863281, 477.6507568359375, 292.7923583984375, 596.2859497070312, 191.95816040039062, 80.56626892089844, -55.06389236450195, -121.82853698730469, 401.1982421875, 432.7185974121094, -114.1947021484375, 331.65911865234375, -69.55152893066406, 296.885498046875, 137.5725555419922, 203.85586547851562, -26.109357833862305, 234.8588104248047, 234.00051879882812, 56.25965118408203, 50.05248260498047, 189.16090393066406, 371.9417724609375, 627.7822265625, 75.051025390625, -201.08062744140625, 170.02352905273438, 546.0341186523438, 94.0048599243164], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000329.npy"}
|
|
{"epoch": 0.4831130690161527, "step": 330, "batch_size": 64, "mean": 188.0545654296875, "std": 196.62806701660156, "min": -179.89501953125, "p10": -54.50350608825683, "median": 189.5232391357422, "p90": 419.8767639160157, "max": 687.195068359375, "pos_frac": 0.828125, "sample": [402.27947998046875, -11.85675048828125, -146.26123046875, -49.529232025146484, 232.23516845703125, 398.79144287109375, 131.55142211914062, 143.92626953125, -56.635337829589844, 64.67009735107422, -96.80076599121094, 124.00238037109375, 150.19424438476562, 400.9180603027344, 427.41845703125, 64.46375274658203, 191.17449951171875, 250.42001342773438, 207.91656494140625, 390.52557373046875, 55.933998107910156, 85.54098510742188, 270.3124084472656, 69.74632263183594, 122.92633056640625, 279.39923095703125, -147.01947021484375, 263.3431396484375, 214.86985778808594, 216.94552612304688, 293.3056945800781, 21.599071502685547, -179.89501953125, 187.87197875976562, 429.017822265625, 165.41653442382812, 73.79008483886719, 625.379150390625, 55.91089630126953, 127.47505187988281, -49.101234436035156, 394.99298095703125, 116.12210083007812, 687.195068359375, -110.42475891113281, 666.7686767578125, 482.24774169921875, 280.2069396972656, 267.7337341308594, 71.92424774169922, 257.9449462890625, 306.48138427734375, 119.05902099609375, 514.1666870117188, 3.3588180541992188, 285.59368896484375, 211.63388061523438, 311.30206298828125, -164.9329833984375, 395.79107666015625, 101.2113037109375, 212.00660705566406, 255.312255859375, -32.376312255859375], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000330.npy"}
|
|
{"epoch": 0.4845814977973568, "step": 331, "batch_size": 64, "mean": 195.0502166748047, "std": 147.8446502685547, "min": -92.0755844116211, "p10": 60.57013168334961, "median": 165.82679748535156, "p90": 362.21746826171875, "max": 628.865478515625, "pos_frac": 0.9375, "sample": [131.80545043945312, 628.865478515625, 282.4065246582031, -11.542083740234375, 288.9332275390625, 218.25210571289062, 77.40496826171875, 10.305618286132812, 191.5713348388672, 112.14794921875, 527.19580078125, 289.0337829589844, 611.9188232421875, 85.86005401611328, 160.4249725341797, 193.65219116210938, 552.928955078125, 119.48509216308594, 163.61422729492188, 116.37619018554688, 365.17645263671875, 355.31317138671875, 390.2412414550781, 83.10606384277344, 106.3319091796875, 125.81303405761719, 153.9217071533203, 190.31900024414062, 256.36419677734375, 19.20806884765625, 182.24822998046875, -46.32765579223633, 248.9007568359375, 90.36436462402344, 130.01651000976562, 112.91573333740234, -92.0755844116211, 143.69281005859375, 254.02239990234375, 522.9534301757812, 279.17578125, 138.9048309326172, 96.21758270263672, 283.32318115234375, 59.964942932128906, 121.21359252929688, 249.05044555664062, 168.03936767578125, 243.86541748046875, 100.86064147949219, -58.52593994140625, 215.9197235107422, 90.14741516113281, 144.98023986816406, 265.5563049316406, 185.11610412597656, 289.650146484375, 94.19388580322266, 98.93553161621094, 277.78094482421875, 216.77333068847656, 244.5380859375, 272.403564453125, 61.98223876953125], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000331.npy"}
|
|
{"epoch": 0.48604992657856094, "step": 332, "batch_size": 64, "mean": 140.09872436523438, "std": 169.36814880371094, "min": -164.37539672851562, "p10": -45.21898803710937, "median": 95.98927307128906, "p90": 379.65570068359375, "max": 606.5932006835938, "pos_frac": 0.796875, "sample": [506.71710205078125, 154.5712890625, 110.50086975097656, 237.21340942382812, 34.57861328125, 13.052352905273438, 84.25819396972656, 354.258544921875, 380.96527099609375, 178.6928253173828, 22.010879516601562, 59.83049011230469, -38.95184326171875, 606.5932006835938, 175.27162170410156, 82.73143768310547, 90.17666625976562, 74.90049743652344, 51.21900177001953, -0.023515701293945312, 192.45306396484375, 84.97572326660156, 351.9591369628906, 2.3299102783203125, 333.12286376953125, 55.9650764465332, -98.0242919921875, 47.514060974121094, 461.8062744140625, 201.457763671875, 133.56211853027344, 45.30554962158203, 181.1627655029297, -22.95275115966797, 4.286888122558594, 69.1478042602539, 415.9111328125, 411.9736633300781, -56.7001953125, 238.98143005371094, 135.88363647460938, -85.03174591064453, 265.97174072265625, 84.34996032714844, 252.55307006835938, 538.0463256835938, 92.31062316894531, 376.60003662109375, -31.80364990234375, 99.66792297363281, -115.7271728515625, 165.906005859375, 280.3537902832031, -47.9049072265625, 298.09393310546875, 84.76921081542969, -29.587448120117188, -15.947822570800781, -89.351806640625, 109.00151824951172, 285.3026123046875, 130.02484130859375, 114.40821838378906, -164.37539672851562], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000332.npy"}
|
|
{"epoch": 0.48751835535976507, "step": 333, "batch_size": 64, "mean": 188.1348419189453, "std": 233.10333251953125, "min": -216.8622283935547, "p10": -89.02511215209961, "median": 164.1890869140625, "p90": 480.80830993652353, "max": 718.875, "pos_frac": 0.734375, "sample": [-46.96302795410156, 218.2109375, 718.875, 8.304763793945312, 88.65836334228516, 27.812973022460938, 255.0806121826172, 244.00233459472656, 54.17987060546875, 28.147151947021484, 655.677490234375, -90.96283721923828, 146.5106201171875, -180.04049682617188, 442.80438232421875, -84.50375366210938, 586.6640625, -33.456512451171875, 531.046630859375, 304.3710632324219, -32.993350982666016, 287.01348876953125, -59.96260070800781, 333.0285949707031, 369.0784912109375, 428.161376953125, -91.18071746826172, 83.8394775390625, 93.5621337890625, 370.1187744140625, 54.22563934326172, 10.32032585144043, -41.72021484375, 461.4431457519531, 429.3409423828125, 358.4012756347656, -198.793212890625, 6.349693298339844, 489.107666015625, 321.2917175292969, 400.7631530761719, -63.92835998535156, -4.1680755615234375, 390.4478454589844, 446.97052001953125, -12.296211242675781, 138.30995178222656, 181.8675537109375, 506.90679931640625, 244.85223388671875, 460.12152099609375, 307.58795166015625, 547.7054443359375, 96.56502532958984, 208.104248046875, -138.7242889404297, 365.4002685546875, -14.339839935302734, 258.32611083984375, -216.8622283935547, 420.84783935546875, -178.02639770507812, 51.56904602050781, 97.57722473144531], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000333.npy"}
|
|
{"epoch": 0.4889867841409692, "step": 334, "batch_size": 64, "mean": 170.35968017578125, "std": 228.11529541015625, "min": -336.39404296875, "p10": -82.98533172607421, "median": 153.73522186279297, "p90": 486.5512176513672, "max": 646.758544921875, "pos_frac": 0.796875, "sample": [458.1834716796875, 99.65582275390625, -146.731689453125, 446.8623962402344, 242.60025024414062, 31.58885955810547, -34.18059539794922, 67.31192779541016, 207.9322052001953, 23.6549072265625, 200.41073608398438, 76.4474868774414, 307.1155090332031, 134.77584838867188, -78.10670471191406, 172.69459533691406, -4.55244255065918, 384.6951904296875, -336.39404296875, 488.5255432128906, 46.540470123291016, 252.78887939453125, -18.708267211914062, -264.98809814453125, 314.1556091308594, 202.652587890625, 414.0504455566406, -239.93898010253906, -122.70164489746094, -0.8803253173828125, 21.655120849609375, -85.076171875, 82.8565673828125, 71.56707763671875, 534.2041625976562, 646.758544921875, 292.3795471191406, 7.497995376586914, 54.24277877807617, 338.4671630859375, 98.79131317138672, 218.28338623046875, 481.9444580078125, 285.31573486328125, 27.489730834960938, 88.32888793945312, 581.4328002929688, 224.40316772460938, 612.4854736328125, 339.49957275390625, -35.73023223876953, -293.42303466796875, 41.16156005859375, 261.18414306640625, 189.6033477783203, 564.0111083984375, 394.76141357421875, 119.7625732421875, 65.61741638183594, 342.38726806640625, 37.43882751464844, 606.488037109375, 185.61993408203125, 176.150390625], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000334.npy"}
|
|
{"epoch": 0.49045521292217326, "step": 335, "batch_size": 64, "mean": 140.5943603515625, "std": 214.9195556640625, "min": -266.5575256347656, "p10": -121.047802734375, "median": 131.0144500732422, "p90": 385.31907653808594, "max": 822.8142700195312, "pos_frac": 0.734375, "sample": [147.99334716796875, 330.072509765625, 381.8973388671875, 39.34112548828125, 156.35693359375, -86.31389617919922, 78.82611846923828, 365.977783203125, 95.35342407226562, 30.2403507232666, -164.17813110351562, 71.66697692871094, 234.15805053710938, 244.66102600097656, 43.90790557861328, 461.69775390625, 76.2402572631836, 140.994384765625, -223.67787170410156, 191.62026977539062, -105.6649169921875, 199.3063201904297, 181.4097137451172, 121.03451538085938, 503.4382629394531, 476.98583984375, 384.5582580566406, 228.19203186035156, 66.74862670898438, 572.7085571289062, 822.8142700195312, -251.30862426757812, 215.97836303710938, -266.5575256347656, 361.36090087890625, 11.548360824584961, 391.84375, 53.57325744628906, 66.510009765625, 316.7183837890625, -117.0438232421875, 323.0833435058594, 39.913978576660156, 17.0960693359375, 270.585205078125, -13.771860122680664, 148.37049865722656, -2.110687255859375, -195.6158447265625, -47.41180419921875, 103.26506042480469, -54.25025939941406, 345.5079345703125, -122.7637939453125, -92.82274627685547, 346.801513671875, 188.19273376464844, 385.6451416015625, -1.2834014892578125, -162.79959106445312, 205.35879516601562, 264.5580749511719, 257.48455810546875, -55.98379135131836], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000335.npy"}
|
|
{"epoch": 0.4919236417033774, "step": 336, "batch_size": 64, "mean": 210.02166748046875, "std": 244.06259155273438, "min": -406.17474365234375, "p10": -70.37049026489258, "median": 183.87696838378906, "p90": 501.6633239746094, "max": 832.7176513671875, "pos_frac": 0.84375, "sample": [249.97845458984375, 46.14366912841797, 669.5733642578125, 28.719812393188477, 424.7752380371094, 232.360107421875, 221.73330688476562, -67.7455062866211, -71.4954833984375, 706.662841796875, 173.22169494628906, 346.93121337890625, 127.19200897216797, 372.25445556640625, 832.7176513671875, -38.4769287109375, 585.734130859375, 365.8520812988281, 210.5873565673828, 325.7664794921875, -97.09437561035156, 72.64456176757812, 145.8572235107422, 275.3760986328125, 64.703125, 241.2513427734375, 105.81568908691406, 100.88531494140625, 489.5330810546875, 247.61163330078125, 168.30491638183594, 73.96880340576172, 105.83146667480469, 165.56997680664062, 128.15492248535156, 142.83395385742188, -164.30259704589844, 506.86199951171875, -81.03945922851562, -36.577781677246094, 397.76483154296875, 91.74555206298828, 672.8104248046875, 66.54316711425781, 409.71563720703125, 60.76899719238281, -406.17474365234375, 44.370277404785156, 288.5263977050781, 272.22930908203125, 103.77159118652344, 127.89498138427734, 194.53224182128906, 96.16498565673828, 283.1141662597656, 392.3521728515625, 388.52593994140625, 800.9491577148438, -292.8914794921875, 278.22369384765625, -250.05267333984375, 305.0576477050781, 296.0535888671875, 420.7149658203125], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000336.npy"}
|
|
{"epoch": 0.4933920704845815, "step": 337, "batch_size": 64, "mean": 204.42605590820312, "std": 206.07077026367188, "min": -254.9093780517578, "p10": -10.134125518798827, "median": 187.66226196289062, "p90": 441.39261779785164, "max": 770.5557250976562, "pos_frac": 0.859375, "sample": [736.1707763671875, 249.98468017578125, -254.9093780517578, 108.05913543701172, 396.25311279296875, 415.8987731933594, 14.966636657714844, 254.11204528808594, 68.94837951660156, 376.96038818359375, 19.18280792236328, 227.32672119140625, 225.52777099609375, 315.65606689453125, 428.3221130371094, -95.16439056396484, 239.7533721923828, 214.77362060546875, -10.738265991210938, 351.83148193359375, 166.2888946533203, 173.7161865234375, 63.73603057861328, 452.75726318359375, 151.58335876464844, 29.66326904296875, 770.5557250976562, 24.14630126953125, -74.68912506103516, -140.0086669921875, 173.88406372070312, 160.49012756347656, 272.12310791015625, -97.22259521484375, 139.7472686767578, 116.48976135253906, 400.5326843261719, 54.72203063964844, 201.44046020507812, 346.65130615234375, -1.9938850402832031, 588.4124145507812, 128.09036254882812, 157.81396484375, -220.0850067138672, 119.13424682617188, 579.2479858398438, 381.4005126953125, 351.7239685058594, 81.08771514892578, 208.64779663085938, 37.32183074951172, 446.9942626953125, 304.3037109375, 122.24180603027344, 494.062255859375, 319.01483154296875, 214.6419677734375, -8.724464416503906, 328.3218994140625, 298.7584533691406, 129.00201416015625, 7.173534393310547, 347.15228271484375], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000337.npy"}
|
|
{"epoch": 0.4948604992657856, "step": 338, "batch_size": 64, "mean": 201.4312744140625, "std": 192.4988250732422, "min": -158.05255126953125, "p10": -20.15741424560545, "median": 163.0802230834961, "p90": 448.17750244140626, "max": 678.9964599609375, "pos_frac": 0.875, "sample": [116.41688537597656, 449.0377502441406, -59.35862731933594, -151.7862548828125, 162.26541137695312, 50.76837921142578, -140.91830444335938, 220.6405487060547, 552.3197631835938, 246.49798583984375, 103.91526794433594, 89.26829528808594, -49.224365234375, 446.1702575683594, 103.20172119140625, 307.43121337890625, 38.058074951171875, 328.45751953125, 346.48370361328125, 432.1222229003906, 565.1045532226562, 62.289695739746094, 246.20281982421875, 262.3197021484375, 278.19244384765625, 101.63075256347656, 348.0770263671875, -4.445610046386719, 17.165489196777344, 29.126564025878906, 163.89503479003906, 514.8226318359375, 426.2337646484375, 68.32582092285156, 191.02078247070312, 296.85894775390625, 103.27336883544922, 23.035526275634766, 294.3919982910156, 103.10826873779297, 379.0776672363281, 58.14265441894531, 97.59649658203125, 5.8754425048828125, 423.00152587890625, 43.373779296875, 228.92910766601562, 91.7041244506836, 326.08062744140625, 136.2382049560547, -27.5152587890625, 42.55168533325195, 519.4183959960938, 678.9964599609375, -26.89104461669922, 295.688720703125, 426.97607421875, 214.20401000976562, 325.65386962890625, 153.05450439453125, 505.4176025390625, -158.05255126953125, 69.56332397460938, 400.11956787109375], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000338.npy"}
|
|
{"epoch": 0.49632892804698975, "step": 339, "batch_size": 64, "mean": 194.13473510742188, "std": 187.5214385986328, "min": -231.0032196044922, "p10": -7.022666931152338, "median": 185.10492706298828, "p90": 476.38750610351576, "max": 631.6327514648438, "pos_frac": 0.875, "sample": [121.29153442382812, 18.08879852294922, -48.024784088134766, -38.37870788574219, 487.83648681640625, 28.959518432617188, 151.04380798339844, 155.86715698242188, 336.1609191894531, 16.27991485595703, 589.9348754882812, 114.88410949707031, 83.37423706054688, 101.36361694335938, 284.89361572265625, 257.2158203125, 12.45985221862793, 54.322471618652344, 54.8909912109375, 504.3094177246094, 247.0240936279297, -9.273307800292969, 261.57513427734375, 21.099815368652344, 186.3492431640625, 161.63111877441406, 489.4141540527344, 27.725814819335938, 332.7362060546875, 631.6327514648438, -151.63241577148438, 253.89370727539062, -89.88713836669922, 122.25131225585938, 434.4913024902344, 268.3167724609375, 152.7347412109375, 22.16620445251465, 194.5491943359375, 307.62420654296875, 129.6036376953125, 198.91868591308594, 355.16461181640625, 373.9844055175781, 83.52021026611328, 449.6732177734375, -107.7835464477539, 548.8597412109375, 214.65377807617188, -231.0032196044922, 221.85980224609375, 245.8604736328125, 20.377792358398438, 288.5959777832031, 64.21646118164062, 107.62992858886719, 183.86061096191406, -1.7711715698242188, 343.734130859375, 261.03125, 417.9488525390625, 507.5158386230469, 355.493408203125, 241.48175048828125], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000339.npy"}
|
|
{"epoch": 0.4977973568281938, "step": 340, "batch_size": 64, "mean": 173.47048950195312, "std": 210.90872192382812, "min": -287.7886657714844, "p10": -42.00229721069336, "median": 151.81165313720703, "p90": 426.75738220214845, "max": 771.309326171875, "pos_frac": 0.78125, "sample": [174.6622314453125, 277.1806335449219, 68.25614166259766, 208.5589599609375, 155.12144470214844, 467.00433349609375, 529.963134765625, -287.7886657714844, 212.48825073242188, -39.90418243408203, 289.376220703125, 771.309326171875, 143.67971801757812, -191.17684936523438, 473.3564147949219, 12.233894348144531, 350.04022216796875, 124.19622039794922, 1.3403854370117188, 69.42034912109375, 177.43324279785156, -26.1424560546875, -264.5263366699219, 421.055419921875, -101.02000427246094, 159.6629638671875, -20.764902114868164, 371.22760009765625, -8.024658203125, 293.0924987792969, 417.1112060546875, 65.73390197753906, 645.0853881835938, 527.6494750976562, 35.00096130371094, 124.12306213378906, 37.179019927978516, 217.72921752929688, 11.927322387695312, 173.7581329345703, 255.98818969726562, 128.2647705078125, 174.59732055664062, -13.742679595947266, -95.07789611816406, 290.20599365234375, 136.37091064453125, -16.886375427246094, 410.8887939453125, -39.16731262207031, 96.97066497802734, 398.1165771484375, 382.8810119628906, 429.2010803222656, 253.47201538085938, 242.37606811523438, 136.61627197265625, 112.19873046875, 310.2131042480469, 148.50186157226562, -154.52333068847656, 389.1415100097656, -42.9014892578125, 101.79747772216797], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000340.npy"}
|
|
{"epoch": 0.49926578560939794, "step": 341, "batch_size": 64, "mean": 202.68289184570312, "std": 203.0848388671875, "min": -240.69960021972656, "p10": -37.34203262329101, "median": 200.6860122680664, "p90": 433.2851654052736, "max": 887.66845703125, "pos_frac": 0.84375, "sample": [887.66845703125, -42.2677001953125, -22.025604248046875, 0.7872905731201172, 337.1004333496094, 406.62054443359375, 513.353271484375, 444.7128601074219, 188.5908966064453, 125.7610092163086, 23.385175704956055, 276.8883972167969, 198.45310974121094, 225.70364379882812, 54.89387512207031, -56.19548034667969, -39.27607727050781, 322.29931640625, 57.887046813964844, 244.90432739257812, -116.7323989868164, 323.3980712890625, 393.75201416015625, 259.4657287597656, 91.76909637451172, 482.2713623046875, 126.65863800048828, 288.1066589355469, -12.740951538085938, -64.05955505371094, 118.14825439453125, 287.91546630859375, 515.2197265625, 214.74063110351562, 249.97850036621094, 304.43603515625, 130.12074279785156, 1.6627368927001953, -32.829261779785156, 361.04315185546875, 387.524658203125, 216.1929168701172, 61.582305908203125, 93.7472915649414, -240.69960021972656, 379.97119140625, 22.411331176757812, 198.1738739013672, -181.34979248046875, 318.4911804199219, 176.20034790039062, 278.2713928222656, 342.41754150390625, 6.034873962402344, 625.4176025390625, 76.03105163574219, 353.4926452636719, 404.32550048828125, 379.56256103515625, 97.74441528320312, 202.91891479492188, 119.91413116455078, 480.72796630859375, 101.03166961669922], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000341.npy"}
|
|
{"epoch": 0.5007342143906021, "step": 342, "batch_size": 64, "mean": 176.73687744140625, "std": 209.7909698486328, "min": -362.284423828125, "p10": -43.63334503173826, "median": 142.14187622070312, "p90": 420.95442810058603, "max": 854.5184326171875, "pos_frac": 0.8125, "sample": [527.5220336914062, -362.284423828125, -137.39108276367188, 429.7696838378906, 75.14999389648438, 65.93312072753906, 193.88485717773438, 258.2779541015625, 240.7909698486328, 379.7669677734375, 75.56172180175781, 136.15626525878906, 190.90452575683594, 211.0812225341797, 438.7094421386719, 243.12677001953125, 70.62184143066406, 356.5232238769531, -98.24030303955078, -24.482437133789062, 275.2803649902344, 2.6540603637695312, 94.24282836914062, -24.69585418701172, -121.21524810791016, 205.6207275390625, 854.5184326171875, 318.9518127441406, 109.93775939941406, -119.83097839355469, 0.9055061340332031, 747.11279296875, 188.81234741210938, 94.88436889648438, -20.172042846679688, -51.749412536621094, 443.24053955078125, 400.385498046875, 468.5670166015625, 137.7115020751953, -16.82545280456543, 146.57225036621094, 334.3582763671875, -9.645355224609375, 302.4389953613281, 356.9744873046875, 118.2313232421875, 360.9515075683594, 146.73855590820312, 382.29656982421875, 390.93841552734375, 93.61463165283203, -113.24506378173828, 80.17080688476562, 294.4412841796875, 26.259193420410156, 130.36398315429688, 2.2092838287353516, 117.4314956665039, 95.35987854003906, 223.76998901367188, 366.1063232421875, 163.836181640625, 41.267967224121094], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000342.npy"}
|
|
{"epoch": 0.5022026431718062, "step": 343, "batch_size": 64, "mean": 212.34361267089844, "std": 171.44384765625, "min": -151.71258544921875, "p10": 6.061971855163575, "median": 188.50286102294922, "p90": 413.21359252929705, "max": 674.3978271484375, "pos_frac": 0.90625, "sample": [364.97430419921875, 160.81546020507812, 157.0832061767578, 134.7606201171875, 240.09002685546875, 5.58697509765625, 195.26107788085938, 286.24835205078125, 169.8380126953125, 592.6422119140625, 88.30409240722656, -26.01055908203125, 181.11456298828125, 53.6302490234375, 168.05889892578125, 54.496150970458984, 81.511962890625, 238.17410278320312, 215.48294067382812, 549.3449096679688, 189.1712188720703, 308.80511474609375, 90.08515930175781, 341.4937438964844, 318.61749267578125, 289.9034423828125, 78.11634826660156, 105.09249877929688, 646.82568359375, 174.45216369628906, -34.10112762451172, 78.84625244140625, 320.7911682128906, 58.4468994140625, 216.0804443359375, 233.8995361328125, 186.60641479492188, 48.84034729003906, 7.170297622680664, 324.24371337890625, -151.71258544921875, 281.7641296386719, 187.83450317382812, 297.3974609375, 300.0731201171875, 674.3978271484375, 356.58074951171875, 276.9975891113281, 302.9045104980469, 307.8440856933594, 203.78079223632812, 312.2771301269531, 88.506103515625, -25.020809173583984, 433.8875732421875, 343.1292419433594, 170.15184020996094, 133.10523986816406, 148.13519287109375, 171.68389892578125, -11.608528137207031, 482.5054626464844, 540.7960815429688, -130.213623046875], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000343.npy"}
|
|
{"epoch": 0.5036710719530103, "step": 344, "batch_size": 64, "mean": 188.32395935058594, "std": 208.96810913085938, "min": -192.25997924804688, "p10": -19.443845558166494, "median": 148.7168960571289, "p90": 446.3431365966797, "max": 1145.20166015625, "pos_frac": 0.875, "sample": [32.77947235107422, 52.42774200439453, -142.51576232910156, -11.23573112487793, 102.80033874511719, 150.18563842773438, 147.24815368652344, 143.17678833007812, 43.98594665527344, 165.4463348388672, 46.431732177734375, 610.090087890625, 38.75530242919922, 214.2597198486328, 50.84593200683594, 177.96090698242188, 208.24000549316406, -70.66197204589844, -95.21196746826172, 238.9013671875, 209.60682678222656, 63.73765563964844, 134.8130645751953, 36.83269119262695, 157.23638916015625, 449.3088684082031, 451.239013671875, 439.423095703125, 113.71263885498047, 22.495452880859375, 408.311767578125, 136.91171264648438, 231.19723510742188, 80.55722045898438, 181.94000244140625, 98.14862060546875, 291.69073486328125, -22.96160888671875, 223.59341430664062, 36.46195983886719, 457.3851013183594, 298.40374755859375, 254.8904266357422, 127.83110809326172, 81.03043365478516, 1145.20166015625, 394.4609680175781, 278.8125305175781, 66.43751525878906, 488.9674072265625, 16.830703735351562, 280.4999084472656, 191.87771606445312, 479.2765197753906, 114.056640625, -24.525955200195312, -131.91796875, 376.34503173828125, 430.8895568847656, -192.25997924804688, 284.76190185546875, 309.6109313964844, 114.37254333496094, 361.32843017578125], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000344.npy"}
|
|
{"epoch": 0.5051395007342144, "step": 345, "batch_size": 64, "mean": 195.83944702148438, "std": 203.5675811767578, "min": -276.3074951171875, "p10": -28.511672019958493, "median": 172.6300811767578, "p90": 462.46964111328145, "max": 720.0210571289062, "pos_frac": 0.84375, "sample": [35.29167938232422, 483.7703552246094, 190.68511962890625, 255.66720581054688, 92.64871978759766, -132.23672485351562, 546.376953125, -82.16838073730469, 172.54998779296875, 361.34124755859375, -6.252370834350586, 202.4067840576172, 111.6646728515625, 319.4963073730469, 412.7679748535156, 131.39224243164062, -215.23751831054688, 207.4205322265625, 35.48142623901367, 159.41383361816406, 156.08383178710938, 334.052001953125, 64.10356140136719, 290.9819030761719, 225.06768798828125, 338.3023681640625, 111.94202423095703, 346.0053405761719, 125.31488037109375, 160.11622619628906, 55.357421875, 673.89697265625, 188.096435546875, 96.94828796386719, 63.14501953125, 409.7292785644531, 358.265380859375, 407.454345703125, 270.1482849121094, 320.58319091796875, 294.4815673828125, 720.0210571289062, 115.34332275390625, 75.02096557617188, 308.41656494140625, 265.4879455566406, 4.785053253173828, 639.095458984375, 133.30892944335938, 360.0940246582031, -47.93901824951172, -26.48053741455078, 172.71017456054688, 503.56292724609375, -10.002782821655273, -128.70098876953125, -29.382158279418945, 77.60659790039062, 521.41552734375, 159.00595092773438, 173.27505493164062, 227.63043212890625, -276.3074951171875, 23.201950073242188], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000345.npy"}
|
|
{"epoch": 0.5066079295154186, "step": 346, "batch_size": 64, "mean": 187.85757446289062, "std": 204.98841857910156, "min": -473.19842529296875, "p10": -39.55287895202635, "median": 204.72759246826172, "p90": 476.7735626220704, "max": 670.4857177734375, "pos_frac": 0.8125, "sample": [241.03843688964844, 136.85499572753906, 36.38878631591797, 300.58160400390625, 497.3003234863281, 315.6463928222656, 584.0179443359375, 198.17750549316406, 201.80508422851562, 670.4857177734375, -473.19842529296875, 115.67924499511719, 267.538330078125, 13.782047271728516, 246.65011596679688, 424.1880798339844, -2.142669677734375, 285.636474609375, 582.4278564453125, 8.523469924926758, 283.24102783203125, 149.03524780273438, -83.31048583984375, 396.69512939453125, 242.21353149414062, 267.1858215332031, 277.1717834472656, 147.760498046875, 293.57000732421875, 108.19808959960938, 16.969999313354492, -92.0804672241211, 165.9896697998047, 98.60862731933594, 494.0578918457031, 207.6501007080078, 235.80470275878906, 119.28128051757812, 271.4190368652344, -47.51133728027344, -14.954780578613281, 515.46142578125, 257.53216552734375, 489.7489013671875, 114.31382751464844, 240.7838897705078, 321.48504638671875, 319.9497375488281, 7.903194427490234, -8.09444808959961, 232.643310546875, -20.983142852783203, 45.051849365234375, -128.74002075195312, 117.42094421386719, 435.79949951171875, 96.18994140625, 153.4677276611328, 446.4977722167969, -151.48971557617188, -13.420654296875, -128.71463012695312, 238.2831268310547, 253.41806030273438], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000346.npy"}
|
|
{"epoch": 0.5080763582966226, "step": 347, "batch_size": 64, "mean": 189.44580078125, "std": 219.36105346679688, "min": -271.8494873046875, "p10": -82.34122238159179, "median": 153.7213134765625, "p90": 481.44636230468757, "max": 689.469482421875, "pos_frac": 0.8125, "sample": [-234.1494140625, 299.02764892578125, 359.56158447265625, 327.3651428222656, 87.98809051513672, 34.62853240966797, 140.12680053710938, 253.8017120361328, 491.6143798828125, 10.477348327636719, 300.5384521484375, 30.877342224121094, 111.11158752441406, -33.37158203125, 328.13604736328125, -211.58555603027344, -271.8494873046875, -53.700714111328125, 668.5748291015625, 118.30663299560547, 378.2298278808594, 49.57413101196289, 141.3583984375, 305.57708740234375, -84.78436279296875, 486.0865478515625, 502.6247863769531, 216.07464599609375, -24.875282287597656, 499.24456787109375, 321.3997802734375, 399.77081298828125, 317.00604248046875, -4.570777893066406, 198.21316528320312, 43.782752990722656, 440.1531982421875, 240.8232421875, 103.85601806640625, -113.33326721191406, 166.084228515625, 268.5732116699219, 101.80205535888672, 292.598876953125, 100.83463287353516, 122.44903564453125, 81.409423828125, 51.08159637451172, 689.469482421875, 66.53931427001953, 293.28033447265625, 470.6192626953125, 205.55332946777344, 674.52880859375, -83.18172454833984, 118.03360748291016, 213.89459228515625, -185.7431640625, 387.7513122558594, 66.6302261352539, 415.5589294433594, -80.38005065917969, 94.04142761230469, 419.4124755859375], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000347.npy"}
|
|
{"epoch": 0.5095447870778267, "step": 348, "batch_size": 64, "mean": 163.54885864257812, "std": 204.6363067626953, "min": -216.6727752685547, "p10": -68.41045303344725, "median": 150.00746154785156, "p90": 419.68670043945326, "max": 848.5498046875, "pos_frac": 0.8125, "sample": [273.0462341308594, 30.98937225341797, 158.35678100585938, 323.7279052734375, 308.0740661621094, 6.880699157714844, 31.099647521972656, 325.8985290527344, 289.19183349609375, -201.0301971435547, 80.76518249511719, 77.89337158203125, 522.8299560546875, 199.20828247070312, 160.4784393310547, 258.69696044921875, 15.116533279418945, 374.9752197265625, 119.12835693359375, 79.421875, 280.34735107421875, 189.28631591796875, -32.03612518310547, 848.5498046875, 391.2555236816406, 275.94720458984375, -32.40081024169922, 255.02096557617188, 5.718732833862305, 128.22613525390625, 15.822006225585938, -115.30516052246094, -60.943115234375, 660.328369140625, 431.8714904785156, 238.58807373046875, 264.3684997558594, 151.36366271972656, 227.8526153564453, 8.344343185424805, -216.6727752685547, 349.26483154296875, -47.20752716064453, 292.2994079589844, 228.67324829101562, 222.90362548828125, -71.6107406616211, 9.545400619506836, 490.34356689453125, 195.6953887939453, 111.79584503173828, -2.7086029052734375, 453.8297119140625, -102.59583282470703, 320.037353515625, 148.65126037597656, 27.288551330566406, 0.8064002990722656, 24.123565673828125, 517.6527099609375, 94.3206787109375, 35.23736572265625, -92.24894714355469, -89.25335693359375], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000348.npy"}
|
|
{"epoch": 0.5110132158590308, "step": 349, "batch_size": 64, "mean": 175.56564331054688, "std": 238.91647338867188, "min": -341.10931396484375, "p10": -83.52045516967772, "median": 119.17279052734375, "p90": 461.00320739746104, "max": 937.6436157226562, "pos_frac": 0.8125, "sample": [10.370262145996094, -341.10931396484375, 360.2562255859375, 126.92389678955078, 84.74098205566406, 85.51828002929688, 350.4762268066406, -88.17509460449219, 211.75302124023438, 349.3133850097656, 108.09515380859375, 391.59478759765625, 161.14398193359375, 59.989112854003906, 35.863136291503906, 38.886253356933594, -24.494726181030273, 21.60289764404297, 245.55047607421875, -106.2430191040039, 388.2311096191406, 299.4063415527344, 239.60354614257812, 20.74107551574707, 351.82440185546875, -24.281055450439453, 352.4227294921875, -72.65962982177734, 111.42168426513672, 79.41600036621094, 687.0484619140625, 25.47748565673828, -26.624706268310547, 304.0035400390625, 215.68234252929688, -295.0578308105469, -180.31182861328125, 106.50591278076172, 511.49072265625, 50.27609634399414, 580.1536865234375, 26.991058349609375, -212.43212890625, 51.58189392089844, 439.5468444824219, 560.9055786132812, 701.693359375, 141.43026733398438, 470.19879150390625, 178.29470825195312, 316.025390625, 406.95294189453125, -36.941993713378906, 211.96490478515625, 199.04933166503906, 166.10769653320312, -105.20760345458984, 68.04060363769531, 80.44743347167969, 401.1544494628906, 311.9935302734375, 50.398284912109375, 63.53668212890625, 937.6436157226562], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000349.npy"}
|
|
{"epoch": 0.5124816446402349, "step": 350, "batch_size": 64, "mean": 184.57717895507812, "std": 222.1927947998047, "min": -443.83990478515625, "p10": -110.71248779296874, "median": 188.60286712646484, "p90": 476.28242797851567, "max": 631.088134765625, "pos_frac": 0.828125, "sample": [631.088134765625, -350.5254211425781, 84.62288665771484, 315.3683166503906, 259.450927734375, 201.046142578125, 409.35357666015625, 53.704345703125, -57.12187194824219, 81.47572326660156, 80.12290954589844, 370.9409484863281, -301.0527038574219, 131.7213897705078, 187.6881103515625, 61.284332275390625, 606.018798828125, 355.9755859375, 585.4705200195312, 285.617919921875, 197.89227294921875, 189.5176239013672, -34.76122283935547, 388.3205261230469, 177.14675903320312, 561.82177734375, 130.4931640625, -109.87094116210938, 182.31866455078125, 122.78678131103516, 504.1956787109375, 139.57696533203125, 174.87307739257812, 213.81838989257812, 299.8399658203125, 279.4658203125, 147.59506225585938, 235.61180114746094, -148.24966430664062, 480.87066650390625, 225.95091247558594, 465.5765380859375, 247.57839965820312, -79.6903076171875, 102.85123443603516, 194.28875732421875, 536.4915771484375, 307.3731384277344, 153.25140380859375, 205.23773193359375, -443.83990478515625, 418.176513671875, 68.87248229980469, 239.97311401367188, 93.05162811279297, 144.48306274414062, -116.78276062011719, -111.07315063476562, 75.91402435302734, 198.64312744140625, 389.15185546875, 112.1889419555664, -144.41590881347656, 404.17279052734375], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000350.npy"}
|
|
{"epoch": 0.5139500734214391, "step": 351, "batch_size": 64, "mean": 213.35069274902344, "std": 245.56260681152344, "min": -310.81256103515625, "p10": -50.55796356201169, "median": 190.7939224243164, "p90": 578.3319091796875, "max": 837.80712890625, "pos_frac": 0.8125, "sample": [100.21163940429688, 272.38494873046875, 399.92083740234375, 743.7130126953125, 837.80712890625, 141.03285217285156, 355.7484130859375, 98.98612976074219, 307.25372314453125, 273.31011962890625, 124.97216796875, 194.6275177001953, 650.237548828125, 652.0673217773438, 230.65135192871094, 343.3930969238281, 345.5249938964844, 505.88238525390625, 571.81640625, -5.019475936889648, -273.6081848144531, 310.86578369140625, 137.03292846679688, 203.24209594726562, 168.28387451171875, 304.14385986328125, 374.2923583984375, 223.47885131835938, -310.81256103515625, -29.143844604492188, -86.27349853515625, 185.54510498046875, -144.31793212890625, 143.70761108398438, 32.120147705078125, 665.6103515625, 26.479293823242188, 152.11236572265625, 172.5621337890625, -23.760353088378906, -59.735443115234375, 366.9223327636719, -11.056072235107422, 630.823486328125, 2.663421630859375, 28.69814109802246, 187.6687469482422, 0.469451904296875, 74.295654296875, -251.0501251220703, -17.995492935180664, 498.53350830078125, 143.18603515625, 234.063720703125, 581.124267578125, -87.45091247558594, 253.39122009277344, 553.5975341796875, 279.7541809082031, 191.03280639648438, 190.55503845214844, 283.2584228515625, 197.6795196533203, 7.932594299316406], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000351.npy"}
|
|
{"epoch": 0.5154185022026432, "step": 352, "batch_size": 64, "mean": 141.55435180664062, "std": 224.2336883544922, "min": -315.75177001953125, "p10": -132.87391510009763, "median": 121.18124389648438, "p90": 442.5471008300782, "max": 841.8489379882812, "pos_frac": 0.75, "sample": [317.6070251464844, 338.135986328125, 134.28997802734375, 422.7026062011719, -245.36184692382812, 82.404052734375, 121.9917984008789, 344.272705078125, 43.03419494628906, 216.1992950439453, 510.978515625, 454.56768798828125, -40.23411178588867, 68.16570281982422, 162.0500946044922, -32.93226623535156, -169.88543701171875, 841.8489379882812, 196.1822967529297, -140.24380493164062, 161.81134033203125, 596.4957275390625, 144.0937957763672, 380.2152099609375, 51.40803527832031, 63.10784149169922, -107.38421630859375, -197.23944091796875, 73.86017608642578, 77.64389038085938, 67.11135864257812, 448.3805847167969, 17.499135971069336, 1.7609786987304688, 453.6173400878906, 158.400390625, -315.75177001953125, 120.37068939208984, 213.553955078125, 365.9869384765625, 175.1040802001953, 59.13641357421875, 193.46324157714844, -195.19857788085938, -37.60332107543945, -75.50021362304688, -152.85958862304688, 428.9356384277344, 354.19732666015625, -115.67750549316406, 196.86244201660156, -63.692955017089844, -111.44588470458984, 353.01971435546875, 216.36343383789062, 194.66851806640625, 389.32196044921875, 27.79126739501953, 525.015625, 118.92232513427734, 15.567352294921875, -40.26463317871094, 146.031005859375, 56.604862213134766], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000352.npy"}
|
|
{"epoch": 0.5168869309838473, "step": 353, "batch_size": 64, "mean": 177.51611328125, "std": 226.57310485839844, "min": -347.21978759765625, "p10": -66.97606124877927, "median": 137.82999420166016, "p90": 510.32151794433594, "max": 668.3455810546875, "pos_frac": 0.765625, "sample": [252.65353393554688, 299.8127746582031, 2.8723983764648438, 498.88323974609375, 174.81903076171875, 309.08380126953125, 49.28352355957031, -92.11824035644531, 459.0406494140625, 164.73193359375, 108.49880981445312, -31.10688018798828, 158.14512634277344, 502.8073425292969, 82.91152954101562, 370.5792236328125, -117.64335632324219, -6.64715576171875, 565.0537109375, 545.0899047851562, 142.57411193847656, 314.46185302734375, 28.418331146240234, 128.34280395507812, -347.21978759765625, 343.765869140625, -197.72598266601562, -25.58854103088379, -49.95575714111328, 235.56719970703125, -34.57084655761719, 73.97872161865234, 349.33782958984375, 193.8842010498047, 46.556488037109375, -74.27047729492188, 90.56390380859375, -106.89111328125, 584.48095703125, 1.638448715209961, 10.953582763671875, 587.7877807617188, -31.96338653564453, 133.08587646484375, 121.51744842529297, 195.2616729736328, 282.7513732910156, 146.4448699951172, 6.190521240234375, 511.1572570800781, 261.0230407714844, -9.536605834960938, 508.3714599609375, 322.7417907714844, 668.3455810546875, 644.1017456054688, -129.1893768310547, 292.6188049316406, 481.95941162109375, -28.468351364135742, 86.70442962646484, 93.79145812988281, 55.07818603515625, 156.20346069335938], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000353.npy"}
|
|
{"epoch": 0.5183553597650514, "step": 354, "batch_size": 64, "mean": 198.263916015625, "std": 183.3903350830078, "min": -125.3340835571289, "p10": -3.3382881164550766, "median": 177.65589904785156, "p90": 439.71649169921886, "max": 673.873291015625, "pos_frac": 0.875, "sample": [292.08477783203125, 377.08563232421875, 195.5151824951172, 24.424163818359375, 388.1011657714844, 181.59710693359375, 537.1346435546875, -92.26405334472656, 209.82000732421875, -91.01945495605469, 22.70793914794922, -125.3340835571289, 143.75572204589844, 673.873291015625, 415.54931640625, 205.74008178710938, -1.9804306030273438, 282.3758239746094, 450.0738525390625, 259.8639831542969, 2.0024795532226562, 324.3294677734375, 133.38525390625, 136.83660888671875, -47.464813232421875, 105.98391723632812, 295.4483337402344, 224.07997131347656, 90.71260833740234, 323.03033447265625, 540.3234252929688, -114.98143005371094, 276.1345520019531, 267.7373962402344, 134.88262939453125, 78.54811096191406, 40.480125427246094, 413.2838439941406, 173.71469116210938, 78.38148498535156, 382.9913330078125, 47.34955596923828, 268.6777648925781, 136.15191650390625, 122.79981231689453, 246.6210174560547, 79.72444915771484, 624.7675170898438, 354.45294189453125, -3.92022705078125, 67.81633758544922, 383.3917541503906, 220.6994171142578, 60.199066162109375, -70.90723419189453, 521.5413818359375, 97.6789779663086, 275.81365966796875, 272.2073669433594, 108.72441101074219, 461.57086181640625, 110.15786743164062, 75.21591186523438, 19.21181869506836], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000354.npy"}
|
|
{"epoch": 0.5198237885462555, "step": 355, "batch_size": 64, "mean": 197.23574829101562, "std": 190.82659912109375, "min": -196.633544921875, "p10": -14.477505493164058, "median": 180.28435516357422, "p90": 463.16187744140626, "max": 756.0557861328125, "pos_frac": 0.859375, "sample": [136.9807586669922, 465.2252197265625, 226.59609985351562, 504.1366882324219, 117.96712493896484, 225.8936004638672, 209.979736328125, 756.0557861328125, -136.242919921875, 253.02830505371094, 196.71408081054688, 491.80743408203125, 14.354988098144531, -9.354568481445312, 377.99810791015625, 458.347412109375, -16.673049926757812, 574.178466796875, 94.1607894897461, 133.72341918945312, 326.91119384765625, 220.54248046875, -38.54335021972656, 18.533061981201172, 285.53125, 494.07647705078125, 393.44427490234375, 173.19290161132812, 122.97038269042969, 8.300094604492188, -37.74776840209961, 149.65423583984375, 128.0942840576172, -196.633544921875, 220.05206298828125, -126.20106506347656, 308.46710205078125, 401.96063232421875, 281.6450500488281, 617.194580078125, 99.4437026977539, 259.06243896484375, 83.39077758789062, 103.98428344726562, 199.50289916992188, 253.642822265625, 101.13764190673828, 379.5067443847656, 70.71652221679688, 151.32955932617188, 453.2451171875, 252.03955078125, 177.1311492919922, 200.2105712890625, 65.06568908691406, 4.29736328125, 97.83351135253906, -144.14218139648438, 183.43756103515625, -5.443683624267578, 88.98159790039062, 290.89874267578125, 275.838134765625, 155.65582275390625], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000355.npy"}
|
|
{"epoch": 0.5212922173274597, "step": 356, "batch_size": 64, "mean": 118.54216003417969, "std": 254.41921997070312, "min": -520.3167724609375, "p10": -173.50719299316407, "median": 102.27500534057617, "p90": 433.98465881347664, "max": 725.140625, "pos_frac": 0.65625, "sample": [164.64901733398438, -16.566749572753906, 338.1822204589844, 141.24807739257812, -21.729755401611328, -133.8720245361328, -60.25750732421875, 225.19009399414062, -110.33612060546875, 205.080322265625, 370.36090087890625, 5.477817535400391, 691.2133178710938, 416.0426940917969, -82.18307495117188, 550.7571411132812, -175.48158264160156, -168.90028381347656, -520.3167724609375, -155.97586059570312, 25.179954528808594, 564.6913452148438, -56.07044982910156, 87.26776123046875, 273.7901306152344, 441.674072265625, -4.565208435058594, 351.9518127441406, -263.65606689453125, 53.20616912841797, -166.4306182861328, 615.5303344726562, 125.9586181640625, -206.26522827148438, 316.83905029296875, 90.02568054199219, 278.6567077636719, 324.97796630859375, -215.46615600585938, 46.17726135253906, 14.341297149658203, -90.4990005493164, 168.37600708007812, 182.60260009765625, 7.782880783081055, 280.3623352050781, 315.2568054199219, -146.30743408203125, 558.9448852539062, 229.63223266601562, 155.33270263671875, -24.494476318359375, -204.4442901611328, -40.771522521972656, 190.67373657226562, 163.89956665039062, 105.69380187988281, 242.96017456054688, -341.7368469238281, 725.140625, 306.4351806640625, 341.133056640625, 1.4725875854492188, 98.85620880126953], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000356.npy"}
|
|
{"epoch": 0.5227606461086637, "step": 357, "batch_size": 64, "mean": 185.00271606445312, "std": 201.10289001464844, "min": -310.8899841308594, "p10": -39.815643310546875, "median": 166.0202407836914, "p90": 440.256185913086, "max": 718.9556884765625, "pos_frac": 0.84375, "sample": [360.56488037109375, 444.3741455078125, 72.52256774902344, 539.5935668945312, 420.2659606933594, 33.20582580566406, 92.24702453613281, 375.19140625, 120.85992431640625, 430.6476135253906, 292.2615966796875, 55.789024353027344, -97.85355377197266, 405.38671875, -38.51446533203125, 141.755859375, 201.43600463867188, 59.62847900390625, 255.8491668701172, -104.65968322753906, 606.0563354492188, 13.052299499511719, 192.66949462890625, 30.534469604492188, -310.8899841308594, 43.04759216308594, 236.3029022216797, 153.20535278320312, -49.7384033203125, 490.94439697265625, 395.8956298828125, -49.41777801513672, 445.6336364746094, -116.23260498046875, 309.50494384765625, 3.778911590576172, 336.1401672363281, 718.9556884765625, 199.9852294921875, 297.871337890625, 9.608184814453125, -33.576087951660156, 285.6091003417969, 354.99749755859375, 16.00391387939453, 121.86424255371094, 86.00932312011719, 270.544921875, -12.309452056884766, 182.42947387695312, -40.373291015625, 286.3350830078125, 218.8153076171875, 29.730911254882812, 282.7139587402344, 235.58843994140625, 119.1277084350586, 336.17803955078125, 22.453643798828125, 178.8351287841797, 90.05552673339844, 615.9964599609375, 54.195465087890625, 121.48858642578125], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000357.npy"}
|
|
{"epoch": 0.5242290748898678, "step": 358, "batch_size": 64, "mean": 164.02752685546875, "std": 204.3319549560547, "min": -331.70263671875, "p10": -33.44535140991211, "median": 107.10916519165039, "p90": 412.77265625, "max": 751.4151611328125, "pos_frac": 0.796875, "sample": [118.32666015625, 449.86126708984375, 39.03825378417969, 214.6819305419922, 198.1431121826172, 285.9710388183594, 55.16377258300781, 213.25921630859375, 106.97941589355469, 99.50039672851562, -34.36072540283203, -0.182586669921875, -63.562835693359375, -25.772476196289062, 28.081937789916992, 684.869384765625, 1.1450881958007812, 352.12847900390625, 56.597808837890625, 69.71908569335938, -4.323736190795898, -40.62879943847656, 10.726844787597656, 67.33935546875, 93.82110595703125, -331.70263671875, 279.64215087890625, -169.440185546875, 680.671875, 390.7616271972656, 413.3328857421875, 212.15054321289062, 294.80450439453125, -9.604225158691406, 536.9999389648438, 178.16079711914062, 56.929054260253906, 97.08078002929688, -61.409385681152344, 216.1298828125, 266.21527099609375, 334.5497741699219, -79.76786804199219, 261.6557312011719, 339.86895751953125, -27.52777099609375, 332.6504821777344, 530.845703125, 21.117298126220703, 107.04191589355469, 288.0030212402344, 126.51895904541016, 38.38397216796875, 3.2248077392578125, 411.4654541015625, 751.4151611328125, 93.68889617919922, 107.1764144897461, 246.23191833496094, -31.309478759765625, 192.41514587402344, 151.85845947265625, 189.6858367919922, 81.32310485839844], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000358.npy"}
|
|
{"epoch": 0.5256975036710719, "step": 359, "batch_size": 64, "mean": 190.79214477539062, "std": 225.0001220703125, "min": -172.94642639160156, "p10": -84.2192153930664, "median": 144.40309143066406, "p90": 489.35569458007825, "max": 830.1753540039062, "pos_frac": 0.765625, "sample": [-172.94642639160156, 234.09933471679688, 140.59304809570312, 383.0003967285156, -144.043701171875, 588.9019775390625, -96.28536987304688, 830.1753540039062, 152.7950439453125, -83.50448608398438, -84.52552795410156, -108.78128814697266, 353.0966491699219, 301.28521728515625, -106.84207153320312, 41.50904846191406, 313.72259521484375, -9.488983154296875, 426.66351318359375, -57.218666076660156, 164.94143676757812, 171.96319580078125, 460.08331298828125, 69.28121185302734, 62.83552932739258, 152.2469024658203, 122.31412506103516, 166.58018493652344, 95.51727294921875, 110.0149154663086, 411.5025634765625, 288.5966491699219, 816.275146484375, 248.9041290283203, -65.94563293457031, 60.083717346191406, 241.82626342773438, 96.90229034423828, -99.37592315673828, 550.3806762695312, 127.24676513671875, 353.6133728027344, 176.3975067138672, 302.04888916015625, -14.808746337890625, 144.529541015625, 423.32293701171875, 160.612060546875, 94.14354705810547, 501.9010009765625, 45.04334259033203, -13.465499877929688, -13.400436401367188, 251.697265625, 144.27664184570312, 514.7614135742188, -10.403533935546875, 140.2804412841797, 645.050537109375, 457.40234375, 140.1136474609375, 410.3743896484375, 75.98945617675781, 126.8359146118164], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000359.npy"}
|
|
{"epoch": 0.527165932452276, "step": 360, "batch_size": 64, "mean": 157.87086486816406, "std": 167.21470642089844, "min": -160.23431396484375, "p10": -47.56783885955808, "median": 125.11323928833008, "p90": 389.2258605957031, "max": 518.4388427734375, "pos_frac": 0.8125, "sample": [37.069000244140625, 392.24188232421875, 38.182518005371094, 137.35171508789062, -26.417444229125977, 338.4249267578125, 99.2520523071289, 179.27651977539062, 322.3143310546875, 100.49998474121094, 121.45694732666016, 296.3802185058594, -56.632293701171875, 251.5347442626953, 509.2998962402344, 136.6355743408203, 26.584941864013672, -22.519393920898438, 367.864501953125, 89.52090454101562, 339.7722473144531, 202.2115478515625, -64.5034408569336, 248.12416076660156, 58.946163177490234, 297.1810302734375, 382.1884765625, 95.94206237792969, 40.74257278442383, 395.8372497558594, 479.9524841308594, 307.8958740234375, 403.82086181640625, -79.92161560058594, 32.59758758544922, 107.71812438964844, 192.3824462890625, 186.78021240234375, 424.88177490234375, -16.979095458984375, 92.10968017578125, 518.4388427734375, -160.23431396484375, -24.983646392822266, -21.446197509765625, 78.218994140625, -86.24468231201172, 129.6760711669922, 108.25862884521484, -95.36134338378906, 14.063232421875, 263.55694580078125, 53.715171813964844, 308.0213317871094, 224.60647583007812, 243.92303466796875, 248.45018005371094, 3.4571151733398438, 128.76953125, 365.35614013671875, 46.10096740722656, -75.0683364868164, 42.898155212402344, 323.56121826171875], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000360.npy"}
|
|
{"epoch": 0.5286343612334802, "step": 361, "batch_size": 64, "mean": 159.09165954589844, "std": 213.7990264892578, "min": -329.2055969238281, "p10": -70.13044433593747, "median": 150.04931640625, "p90": 440.59528503417977, "max": 661.807861328125, "pos_frac": 0.765625, "sample": [40.183250427246094, 152.13723754882812, 35.29719161987305, 276.5382080078125, -41.47901153564453, -111.43933868408203, 27.231101989746094, 181.98243713378906, 519.690185546875, 157.5511932373047, 181.6905517578125, 191.08456420898438, -329.2055969238281, 408.6099853515625, -32.84455871582031, -34.41272735595703, 192.99838256835938, 143.50990295410156, -219.65179443359375, 3.114988327026367, 227.25753784179688, -118.39403533935547, 75.5657958984375, 20.616397857666016, 221.5929412841797, 661.807861328125, 225.38180541992188, 406.7115478515625, 99.33721923828125, 368.9809265136719, -13.445526123046875, 4.159809112548828, 177.3715362548828, 69.87551879882812, 475.46429443359375, 424.07989501953125, -31.973312377929688, 617.8304443359375, 644.3159790039062, 340.83905029296875, 223.775634765625, 256.9859924316406, 494.3022766113281, 346.47784423828125, 147.96139526367188, 183.02951049804688, 51.90068817138672, 147.82998657226562, -15.219390869140625, -82.40962982177734, 20.383468627929688, -39.11032485961914, 447.6733093261719, -2.3167877197265625, 285.8370361328125, 317.902099609375, 143.71966552734375, -103.5633773803711, 223.61886596679688, 6.1529693603515625, 74.77264404296875, -286.1148681640625, 305.684326171875, 392.63153076171875], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000361.npy"}
|
|
{"epoch": 0.5301027900146843, "step": 362, "batch_size": 64, "mean": 236.09628295898438, "std": 214.4419708251953, "min": -100.63412475585938, "p10": -19.468302917480464, "median": 212.3677978515625, "p90": 493.04087219238284, "max": 1021.8084716796875, "pos_frac": 0.875, "sample": [78.01522827148438, 214.0638427734375, 494.200927734375, 426.3573913574219, 227.1336669921875, 32.19715118408203, 1021.8084716796875, 237.582275390625, 124.30239868164062, 463.3244934082031, 440.9626770019531, 87.14505767822266, 69.9158706665039, 206.50588989257812, 224.10911560058594, 403.6877136230469, 171.46835327148438, -21.367345809936523, 98.48648071289062, 348.28155517578125, 368.93487548828125, 71.66964721679688, 39.751243591308594, 3.04656982421875, -95.82890319824219, -15.037202835083008, 142.22640991210938, 223.20323181152344, -96.0925521850586, 644.4024658203125, 517.391845703125, 127.96067810058594, -47.21343994140625, 279.8666076660156, 254.4792022705078, 192.525634765625, 490.3340759277344, -33.900779724121094, 210.57554626464844, 473.8385009765625, 215.94508361816406, 44.84394836425781, 611.1069946289062, 398.1068420410156, 150.68994140625, 118.16165924072266, 330.6957092285156, 210.6717529296875, 40.76783752441406, 687.3897094726562, 197.942626953125, 496.76812744140625, 419.5776672363281, 265.71942138671875, 200.13711547851562, 393.3166809082031, 408.4583740234375, -80.17218017578125, 198.87876892089844, 274.0697326660156, 239.96697998046875, 271.2502136230469, 16.189605712890625, -100.63412475585938], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000362.npy"}
|
|
{"epoch": 0.5315712187958884, "step": 363, "batch_size": 64, "mean": 243.86749267578125, "std": 220.62222290039062, "min": -249.14703369140625, "p10": -7.535153198242178, "median": 232.5226287841797, "p90": 511.72539672851565, "max": 944.8282470703125, "pos_frac": 0.890625, "sample": [-249.14703369140625, 329.23846435546875, 502.2667541503906, 197.37823486328125, 104.49314880371094, 192.36837768554688, 235.922119140625, 254.62579345703125, 173.7934112548828, 61.37609100341797, 334.9991149902344, 183.1156768798828, 346.8209533691406, 136.4169921875, 292.41473388671875, 718.227294921875, -69.34461975097656, 489.8294982910156, 110.28370666503906, 649.72021484375, 395.7750549316406, 156.72515869140625, 239.48562622070312, 341.4486999511719, 280.5183410644531, 370.17236328125, 944.8282470703125, 313.78875732421875, 26.29876708984375, 527.087890625, 121.79176330566406, 206.18971252441406, 50.0421142578125, 187.29006958007812, 431.3257141113281, 74.38416290283203, 211.21983337402344, -11.575471878051758, 513.46923828125, 443.4548034667969, 389.1761474609375, -71.9702377319336, 30.97968292236328, 399.872802734375, -40.66625213623047, 335.87677001953125, 507.65643310546875, 1.66082763671875, 239.091552734375, 453.9138488769531, 1.2259445190429688, 113.27655029296875, -95.67159271240234, 358.08880615234375, 235.2762451171875, -11.289909362792969, 31.340293884277344, 229.76901245117188, 35.925655364990234, 112.80487823486328, 633.1737670898438, 603.2291870117188, 36.25200653076172, 260.0074768066406], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000363.npy"}
|
|
{"epoch": 0.5330396475770925, "step": 364, "batch_size": 64, "mean": 183.8577880859375, "std": 220.60147094726562, "min": -347.85784912109375, "p10": -41.70649337768554, "median": 182.28695678710938, "p90": 452.72578125000007, "max": 798.7374267578125, "pos_frac": 0.84375, "sample": [327.1070861816406, 278.6216735839844, -192.6000213623047, 139.66876220703125, -347.85784912109375, 433.30419921875, -246.65875244140625, 285.6312255859375, 263.6938171386719, 798.7374267578125, 160.76353454589844, 533.3915405273438, 320.7978820800781, 313.69451904296875, -31.804664611816406, 50.11299133300781, 33.901824951171875, -175.66363525390625, 295.6961669921875, -45.95013427734375, 76.998046875, 358.2541809082031, 189.38119506835938, 403.1890869140625, 245.0087890625, 39.9862060546875, 18.86483383178711, 461.04931640625, 208.23817443847656, 229.633056640625, -17.108016967773438, 578.6686401367188, 121.4780044555664, 107.55899047851562, 115.1683120727539, 113.24345397949219, 307.7591247558594, 231.93801879882812, 345.4773864746094, -82.49819946289062, 50.029014587402344, 498.3031005859375, 511.29248046875, 67.37547302246094, 190.5806427001953, 15.275260925292969, 30.82350730895996, 298.6021728515625, 416.37457275390625, 286.4593200683594, 55.07279586791992, 175.19271850585938, -26.31144905090332, 143.3843994140625, 722.6990966796875, 81.67343139648438, 204.150146484375, 272.5952453613281, 4.705617904663086, 254.00027465820312, 77.50958251953125, 130.82363891601562, 335.91192626953125, -276.5007629394531], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000364.npy"}
|
|
{"epoch": 0.5345080763582967, "step": 365, "batch_size": 64, "mean": 181.22683715820312, "std": 216.1465301513672, "min": -191.37342834472656, "p10": -23.303179740905748, "median": 139.76043701171875, "p90": 462.95158386230503, "max": 1036.482177734375, "pos_frac": 0.875, "sample": [176.88760375976562, 68.48416900634766, 63.25482177734375, 143.12591552734375, 83.37808227539062, 103.62908172607422, 343.5643310546875, 15.860694885253906, 193.8466796875, 24.324432373046875, 168.48414611816406, 355.01544189453125, -28.740007400512695, 191.69522094726562, 15.035751342773438, -10.61724853515625, 73.78941345214844, 744.59130859375, 515.5985717773438, 240.20803833007812, 60.13300323486328, 70.0102310180664, 38.984825134277344, 11.823516845703125, 128.16726684570312, 34.0499267578125, 94.05471801757812, 103.10467529296875, 164.9620361328125, 157.33795166015625, -88.81428527832031, -77.40399169921875, 136.39495849609375, 34.21438217163086, 370.02001953125, 525.3074340820312, 176.741943359375, 71.85406494140625, -86.9599609375, 293.78802490234375, 224.7680206298828, -150.7305908203125, 189.70352172851562, 17.3253116607666, 51.89215850830078, 305.5632629394531, 234.23805236816406, 290.3426513671875, 262.2438659667969, -90.5086669921875, 54.824195861816406, 351.816650390625, 21.653589248657227, 284.92864990234375, 211.6525421142578, 238.31060791015625, 1036.482177734375, 498.33502197265625, 380.3902282714844, -191.37342834472656, 645.1158447265625, 560.310791015625, 116.82029724121094, 355.2260437011719], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000365.npy"}
|
|
{"epoch": 0.5359765051395007, "step": 366, "batch_size": 64, "mean": 188.9411163330078, "std": 227.1842803955078, "min": -190.97695922851562, "p10": -101.37859573364257, "median": 168.894775390625, "p90": 471.3795013427735, "max": 751.4403686523438, "pos_frac": 0.78125, "sample": [-175.9010009765625, 153.2808837890625, 305.0510559082031, 97.67355346679688, -185.953857421875, 28.8608455657959, -78.69316864013672, 72.96124267578125, -1.51397705078125, 361.29833984375, 69.70414733886719, -190.97695922851562, 462.8883056640625, 586.5887451171875, 199.14938354492188, -36.08686828613281, 272.2559509277344, 8.515377044677734, 125.82274627685547, -102.90379333496094, 310.2875061035156, 382.4290771484375, 306.6880798339844, 35.17967987060547, 427.20941162109375, 751.4403686523438, 322.1816101074219, 98.53923797607422, 423.8553161621094, 186.49099731445312, 129.22265625, -38.1845703125, -123.55620574951172, 488.436767578125, 335.63140869140625, 300.1529541015625, 691.7294311523438, 342.5300598144531, 184.5086669921875, 121.92414855957031, 215.95452880859375, 131.01190185546875, 64.4608154296875, -172.0433349609375, 430.3348388671875, -97.8198013305664, 282.1839599609375, 22.311450958251953, 436.35772705078125, 475.0185852050781, 371.9583740234375, 199.15328979492188, 545.4425048828125, 236.69580078125, 644.6453247070312, 278.7792663574219, -63.67570495605469, 83.65406799316406, -92.0347900390625, 116.73031616210938, 121.6874008178711, 127.68317413330078, 264.52374267578125, -179.49974060058594], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000366.npy"}
|
|
{"epoch": 0.5374449339207048, "step": 367, "batch_size": 64, "mean": 164.744140625, "std": 186.0782928466797, "min": -320.3995666503906, "p10": -25.33668518066405, "median": 123.35981369018555, "p90": 438.38703918457037, "max": 542.203125, "pos_frac": 0.8125, "sample": [92.49388122558594, 115.98542785644531, 444.00494384765625, 70.80458068847656, 525.023681640625, 23.007719039916992, 412.11859130859375, 54.79672622680664, 443.43475341796875, 46.07544708251953, 33.60693359375, 8.41557502746582, -4.750244140625, 468.9006042480469, 358.0887756347656, 247.724609375, 305.43499755859375, 87.59043884277344, 209.73565673828125, -38.804481506347656, 228.41543579101562, 406.8138122558594, -30.49176788330078, -8.355140686035156, 221.36386108398438, 292.229248046875, 542.203125, 68.67799377441406, 494.81561279296875, 48.83135986328125, 426.6090393066406, 120.38633728027344, -1.5682334899902344, 295.64776611328125, 66.09791564941406, 185.60214233398438, 255.78285217285156, 318.043701171875, 295.20660400390625, 71.64385986328125, 184.37411499023438, -186.38348388671875, -117.70680236816406, 5.722597122192383, -34.21856689453125, -7.536279678344727, 29.536285400390625, 265.0156555175781, 146.9052276611328, 47.93353271484375, -83.9342269897461, -320.3995666503906, 126.33329010009766, 77.65132141113281, 266.22235107421875, 316.12823486328125, 352.5154724121094, 227.7364044189453, 399.41802978515625, 18.955459594726562, 148.73548889160156, -13.308158874511719, 449.27069091796875, 43.013671875], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000367.npy"}
|
|
{"epoch": 0.5389133627019089, "step": 368, "batch_size": 64, "mean": 228.497802734375, "std": 171.1933135986328, "min": -173.19830322265625, "p10": 2.7566841125488355, "median": 256.42962646484375, "p90": 411.0368316650391, "max": 761.466064453125, "pos_frac": 0.890625, "sample": [290.9034423828125, 342.77532958984375, 227.88601684570312, 286.50396728515625, 288.90252685546875, 437.99517822265625, 140.46627807617188, 10.047836303710938, 482.8217468261719, 261.12738037109375, 16.04733657836914, 319.74322509765625, 359.0921630859375, -21.89297866821289, 287.2846374511719, 304.3531188964844, 216.25704956054688, 225.67649841308594, -44.20313262939453, 343.1454162597656, 190.97808837890625, 155.2228240966797, 511.2516784667969, 265.9241943359375, 318.3013610839844, 83.14187622070312, 324.71697998046875, -0.36809539794921875, -10.46121597290039, 26.031822204589844, 63.93455505371094, 388.35797119140625, 277.9233093261719, 205.39178466796875, 256.20318603515625, 338.76910400390625, 257.97076416015625, 48.31330871582031, 167.8133087158203, -14.332725524902344, 256.65606689453125, 418.79168701171875, 352.23724365234375, 414.8958435058594, 166.17141723632812, 286.0031433105469, 616.20166015625, 23.771865844726562, -173.19830322265625, 106.57839965820312, 91.86585235595703, 402.032470703125, 325.1560363769531, 397.1111755371094, 354.0797424316406, 55.279388427734375, 183.21377563476562, 761.466064453125, 178.10946655273438, 251.38845825195312, 196.22442626953125, 26.33245849609375, -44.50278854370117, 347.97601318359375], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000368.npy"}
|
|
{"epoch": 0.540381791483113, "step": 369, "batch_size": 64, "mean": 142.8871307373047, "std": 210.41445922851562, "min": -362.4417724609375, "p10": -100.76508789062498, "median": 125.9641227722168, "p90": 415.36763916015633, "max": 693.2059326171875, "pos_frac": 0.734375, "sample": [-90.2174072265625, -88.8642349243164, 607.5858154296875, -31.1586971282959, -192.2688751220703, 13.663358688354492, -5.489738464355469, 281.55950927734375, 42.733699798583984, 438.5783386230469, 205.73287963867188, -60.12640380859375, 174.61395263671875, 259.0145263671875, 255.02601623535156, 693.2059326171875, 399.9236755371094, 163.20974731445312, -2.918956756591797, 346.4435119628906, 277.2718811035156, 118.29716491699219, 135.45382690429688, 150.90621948242188, 253.23416137695312, 159.42401123046875, -337.52685546875, 232.33367919921875, 168.73892211914062, 106.14672088623047, 238.8164520263672, 301.3707275390625, 242.22433471679688, 36.20503234863281, 112.06619262695312, 121.18407440185547, 293.7740783691406, 74.07811737060547, -12.531234741210938, 373.5826721191406, 421.9864807128906, -122.62892150878906, 569.90576171875, 60.72502136230469, 371.3902587890625, 230.826416015625, 308.12261962890625, -147.58309936523438, -3.920989990234375, 459.7550048828125, 130.74417114257812, 98.93814086914062, -362.4417724609375, 74.0663070678711, 119.58385467529297, -105.2855224609375, -220.9964141845703, 21.131446838378906, 119.04215240478516, 462.01373291015625, 202.61904907226562, -28.648048400878906, 51.578243255615234, -21.443565368652344], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000369.npy"}
|
|
{"epoch": 0.5418502202643172, "step": 370, "batch_size": 64, "mean": 123.87529754638672, "std": 198.8511505126953, "min": -307.55987548828125, "p10": -143.22698059082032, "median": 137.33474731445312, "p90": 367.3718566894531, "max": 575.2557983398438, "pos_frac": 0.71875, "sample": [69.96967315673828, 286.4494323730469, 148.6827850341797, -181.0474090576172, 575.2557983398438, 169.38775634765625, 165.1727294921875, 229.52003479003906, 28.931989669799805, -56.059791564941406, 291.04095458984375, 246.67129516601562, 28.895843505859375, 161.114501953125, -126.00578308105469, 433.688232421875, 304.43865966796875, 123.59422302246094, 197.46469116210938, 511.0408630371094, 521.3067016601562, 95.29953002929688, 264.7774658203125, -37.06816864013672, 234.19363403320312, -11.288703918457031, 72.19757080078125, 150.00047302246094, -135.80355834960938, 154.52767944335938, 404.20257568359375, 78.01275634765625, -24.178085327148438, 269.44085693359375, -19.41707992553711, 260.207275390625, 226.640625, -146.408447265625, -278.20758056640625, 26.934797286987305, 119.53118896484375, 190.60784912109375, -213.3378143310547, 133.1142578125, 361.48248291015625, 131.8458251953125, 121.9547119140625, 299.767333984375, 242.67445373535156, -307.55987548828125, -68.42665100097656, -33.311920166015625, 116.08114624023438, 162.4761962890625, -183.57907104492188, -119.1560287475586, 17.141006469726562, -239.13082885742188, -27.60427474975586, 283.8094482421875, 497.4687805175781, 217.1429443359375, 141.55523681640625, 369.8958740234375], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000370.npy"}
|
|
{"epoch": 0.5433186490455213, "step": 371, "batch_size": 64, "mean": 179.04043579101562, "std": 217.3778076171875, "min": -420.6995849609375, "p10": -81.60954055786132, "median": 159.31957244873047, "p90": 474.1719207763673, "max": 692.4502563476562, "pos_frac": 0.78125, "sample": [284.8674621582031, 253.20819091796875, 488.0914611816406, 64.46135711669922, -420.6995849609375, 204.51553344726562, 486.75640869140625, 196.49151611328125, 596.3145141601562, 287.39306640625, 344.5718994140625, 30.91274070739746, 303.62835693359375, 233.47760009765625, -155.49526977539062, 114.29841613769531, 72.078857421875, 163.7374267578125, 447.4761962890625, 138.51358032226562, 23.57951545715332, -200.4441680908203, 421.6644287109375, -85.2906494140625, -111.87646484375, 154.90171813964844, 617.9540405273438, 359.97479248046875, -66.24195861816406, 102.90505981445312, 551.308349609375, -34.97483825683594, 246.71920776367188, 199.7805938720703, -73.0202865600586, -20.985885620117188, 352.6836242675781, -98.38089752197266, 166.41256713867188, 389.1556396484375, -34.305023193359375, 97.56014251708984, 485.6129455566406, 136.63978576660156, 26.822704315185547, 248.817626953125, 75.27520751953125, 367.79339599609375, 410.99139404296875, 3.0766143798828125, 312.4478759765625, 114.20604705810547, 87.94633483886719, 319.9197998046875, 179.69935607910156, 119.50692749023438, 120.35478210449219, 70.96987915039062, -108.37467956542969, 429.20166015625, 294.258544921875, 692.4502563476562, -14.411666870117188, -8.296722412109375], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000371.npy"}
|
|
{"epoch": 0.5447870778267254, "step": 372, "batch_size": 64, "mean": 220.14207458496094, "std": 255.7134552001953, "min": -242.57266235351562, "p10": -97.73456878662107, "median": 172.04896545410156, "p90": 592.1791076660157, "max": 924.1012573242188, "pos_frac": 0.78125, "sample": [316.1749267578125, 316.24993896484375, 499.70733642578125, -111.26472473144531, 42.12486267089844, 609.9451904296875, 550.7249145507812, 233.34442138671875, 233.18899536132812, -5.93377685546875, 924.1012573242188, 341.1419372558594, 108.5345458984375, 163.801513671875, -37.348480224609375, 111.0726547241211, 89.65748596191406, 129.8253173828125, 634.1285400390625, -167.08079528808594, 166.5216827392578, 186.43617248535156, 493.1499328613281, -127.9589614868164, 466.1246337890625, 185.41249084472656, 95.5709457397461, 136.70443725585938, 111.37335205078125, 129.95855712890625, 46.72772216796875, 254.36587524414062, 416.9358825683594, -66.99959564208984, 104.96084594726562, -59.33794403076172, 353.9625549316406, 651.6157836914062, 145.2396240234375, 746.912841796875, 405.2586975097656, 339.6337585449219, 187.091064453125, -242.57266235351562, 346.7663269042969, 465.4870300292969, 489.46649169921875, 193.6562957763672, -21.445632934570312, 278.7101745605469, 68.80227661132812, 260.37347412109375, 138.26023864746094, -47.28789520263672, 693.4873046875, 177.5762481689453, -13.556917190551758, 162.41741943359375, -110.90670013427734, 766.287353515625, -219.2583770751953, -178.31243896484375, 373.0224609375, 156.36468505859375], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000372.npy"}
|
|
{"epoch": 0.5462555066079295, "step": 373, "batch_size": 64, "mean": 216.18002319335938, "std": 207.33937072753906, "min": -313.4776916503906, "p10": -8.50643005371093, "median": 186.04339599609375, "p90": 536.3402221679689, "max": 678.5515747070312, "pos_frac": 0.875, "sample": [110.36768341064453, -120.01377868652344, -41.6749267578125, 199.80935668945312, 54.864959716796875, 678.5515747070312, 369.9904479980469, 110.01014709472656, 266.8544921875, 156.73709106445312, 560.8892822265625, 212.46395874023438, -1.854400634765625, 145.373779296875, 339.4249572753906, 109.57511138916016, 14.506741523742676, 565.0516967773438, 266.7232360839844, 95.32353973388672, -221.71560668945312, -11.3572998046875, 238.47470092773438, -313.4776916503906, 184.48565673828125, 46.06678771972656, 172.845458984375, 149.81878662109375, 452.2941589355469, 281.64068603515625, 129.06353759765625, 327.12628173828125, 289.62969970703125, 316.99993896484375, 346.68994140625, 69.39057922363281, -65.9045181274414, 485.3316650390625, 240.7984161376953, 70.11836242675781, 627.8461303710938, 88.08226776123047, 341.46563720703125, 281.32952880859375, 403.22686767578125, 319.04547119140625, 285.3175048828125, 368.5347900390625, 487.4365234375, 630.0841064453125, 153.21487426757812, 85.554931640625, 120.4571533203125, 131.56153869628906, 7.8358306884765625, 186.9156951904297, 185.1710968017578, 134.4969940185547, 577.2595825195312, 289.44598388671875, 336.6759948730469, 557.2989501953125, -162.70628356933594, 118.67513275146484], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000373.npy"}
|
|
{"epoch": 0.5477239353891337, "step": 374, "batch_size": 64, "mean": 184.58416748046875, "std": 251.88743591308594, "min": -428.25250244140625, "p10": -68.70586166381835, "median": 148.08279418945312, "p90": 478.3507781982422, "max": 949.069091796875, "pos_frac": 0.78125, "sample": [-116.09868621826172, 152.94662475585938, -56.19758605957031, 949.069091796875, 218.5698699951172, 903.0863037109375, 196.95895385742188, 520.472900390625, 272.99786376953125, 9.713462829589844, 94.050048828125, 227.93563842773438, 27.837188720703125, 140.42300415039062, 54.71348571777344, -22.69127655029297, 380.1796875, 294.3100891113281, 464.95648193359375, 253.60659790039062, -53.884613037109375, 119.1407470703125, 414.644775390625, 0.6098480224609375, -30.944774627685547, 567.19873046875, -1.057516098022461, 96.60298156738281, -169.99449157714844, 306.4559631347656, 239.90078735351562, 268.8597412109375, -87.33004760742188, 624.5318603515625, -263.35992431640625, 204.36134338378906, 35.10008239746094, 453.7861633300781, 159.653564453125, -27.99664306640625, 136.9116973876953, 107.38336944580078, 94.91211700439453, 71.28520202636719, 178.91761779785156, 143.21896362304688, -16.757949829101562, 455.7209167480469, 208.7897491455078, 292.5881652832031, 63.42362976074219, 468.1171875, -87.02018737792969, 374.0126953125, 246.962646484375, 4.722969055175781, -74.0665512084961, 237.23135375976562, 28.543228149414062, 86.08866882324219, 748.853759765625, -428.25250244140625, 165.94479370117188, 482.7366027832031], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000374.npy"}
|
|
{"epoch": 0.5491923641703378, "step": 375, "batch_size": 64, "mean": 173.7774658203125, "std": 239.2744140625, "min": -489.3525390625, "p10": -109.86961135864256, "median": 170.8267364501953, "p90": 472.42402038574227, "max": 700.9549560546875, "pos_frac": 0.765625, "sample": [502.2880554199219, 334.3835144042969, 14.75897216796875, 243.79824829101562, -81.51725769042969, -142.39373779296875, 378.1220703125, 230.92013549804688, 111.26179504394531, -14.436511993408203, 34.74517059326172, 78.80857849121094, 145.941162109375, -489.3525390625, 52.44432830810547, 659.0789794921875, 283.2102966308594, 40.758750915527344, 318.06329345703125, 217.686767578125, 26.52215576171875, 278.0968017578125, -358.28179931640625, 99.48816680908203, 337.73541259765625, -62.16194152832031, 511.2447814941406, 65.5578384399414, -23.579315185546875, 129.38162231445312, -92.8865737915039, -14.491958618164062, 501.7505187988281, 505.44696044921875, -27.462413787841797, 246.53903198242188, 340.2917785644531, 432.0802001953125, 345.2158508300781, 481.1217956542969, 312.1539611816406, 171.59120178222656, 77.06224822998047, -275.4312744140625, 357.76300048828125, 371.12042236328125, 290.6123962402344, -204.55921936035156, 700.9549560546875, 450.97918701171875, 320.5140075683594, -171.2373046875, 1.844757080078125, 240.9466552734375, 94.95051574707031, 452.12921142578125, 170.06227111816406, 448.4243469238281, -62.31188201904297, 132.44732666015625, 258.817138671875, 321.409423828125, 138.48318481445312, -117.14805603027344], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000375.npy"}
|
|
{"epoch": 0.5506607929515418, "step": 376, "batch_size": 64, "mean": 215.9973907470703, "std": 240.43817138671875, "min": -423.17962646484375, "p10": -25.206413269042965, "median": 157.4661407470703, "p90": 554.9489501953126, "max": 683.0078125, "pos_frac": 0.875, "sample": [91.82125091552734, 576.9262084960938, 163.57162475585938, 68.11143493652344, 683.0078125, 107.25801086425781, 321.2330017089844, -183.70169067382812, 24.782821655273438, -27.025955200195312, 314.97802734375, 1.7406749725341797, 530.7308959960938, 94.2525634765625, -184.42807006835938, 349.16009521484375, 352.93280029296875, -67.39932250976562, -20.9608154296875, 86.88066101074219, 72.51313781738281, 42.914161682128906, -116.33770751953125, 624.9479370117188, 620.0802612304688, 89.66265106201172, 325.436767578125, 246.25254821777344, 478.860107421875, 0.5132865905761719, 563.8055419921875, 523.4685668945312, 90.46040344238281, 349.77740478515625, 222.34312438964844, 452.0555725097656, 472.3988342285156, 142.62637329101562, 353.5589599609375, 151.36065673828125, 106.94886779785156, 411.6195983886719, 10.943008422851562, 273.44683837890625, 447.6236267089844, 181.2365264892578, 674.48974609375, 44.410865783691406, -124.17430114746094, 69.1238784790039, 7.7250518798828125, 16.86324119567871, 583.0069580078125, 534.2835693359375, 228.67428588867188, 464.63665771484375, 112.97708129882812, 241.650634765625, 147.88821411132812, 193.050537109375, 37.91364288330078, 71.61103820800781, 520.4916381835938, -423.17962646484375], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000376.npy"}
|
|
{"epoch": 0.5521292217327459, "step": 377, "batch_size": 64, "mean": 204.14149475097656, "std": 221.7847137451172, "min": -279.9420166015625, "p10": -57.45343780517577, "median": 208.67232513427734, "p90": 507.6256713867188, "max": 672.5046997070312, "pos_frac": 0.75, "sample": [356.5177001953125, 672.5046997070312, 197.06588745117188, -2.0157470703125, 225.0085906982422, -64.84222412109375, 252.50579833984375, -110.4139404296875, 290.723876953125, 29.37763214111328, 496.30963134765625, 629.2623291015625, -33.22863006591797, 603.6539306640625, 11.880956649780273, 157.21954345703125, 148.09303283691406, 406.3470764160156, 220.95712280273438, -40.21293640136719, 183.98306274414062, 477.0491943359375, 253.55966186523438, -12.513172149658203, 420.6343688964844, 588.2705688476562, 286.8312072753906, -146.56504821777344, 598.459716796875, 132.14309692382812, 320.7842102050781, 213.3336181640625, 31.144004821777344, 126.54641723632812, 62.75533676147461, 374.56591796875, 122.91743469238281, 236.52017211914062, 269.7607421875, 615.0408935546875, 245.94764709472656, -31.705078125, -279.9420166015625, 296.3139953613281, -15.09783935546875, 133.14817810058594, 417.07818603515625, 204.0110321044922, -143.00572204589844, 277.6991882324219, -0.2805328369140625, 512.4754028320312, -36.829010009765625, -90.71844482421875, 75.91609954833984, 457.6179504394531, 172.5582275390625, 456.14886474609375, 346.57623291015625, -0.8263473510742188, 251.43202209472656, 123.676513671875, 224.23867797851562, -133.31341552734375], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000377.npy"}
|
|
{"epoch": 0.55359765051395, "step": 378, "batch_size": 64, "mean": 231.7072296142578, "std": 214.9288330078125, "min": -339.3448181152344, "p10": -49.806359863281244, "median": 226.4088134765625, "p90": 493.5965026855469, "max": 828.5306396484375, "pos_frac": 0.84375, "sample": [538.2099609375, 344.7206726074219, 500.7086486816406, 587.5281372070312, 435.5445556640625, 105.23816680908203, 190.31524658203125, 281.63775634765625, 181.85369873046875, 248.25772094726562, 363.62115478515625, 294.2434387207031, 418.7729187011719, 179.3567657470703, 204.89691162109375, -17.718795776367188, 245.68023681640625, 554.5391845703125, 190.21435546875, 267.89013671875, 171.9192657470703, -52.74224090576172, -16.36794662475586, 105.67459869384766, 222.32177734375, 197.09945678710938, -71.53956604003906, 412.2921142578125, 122.88793182373047, 344.2895202636719, 423.0378112792969, 406.2391662597656, 222.1221923828125, 31.172422409057617, -42.955970764160156, 397.1147766113281, 202.78567504882812, 828.5306396484375, 23.67994499206543, -104.93692016601562, 232.9280242919922, 58.39575958251953, 221.6168975830078, 490.7331848144531, 54.4658203125, -56.62858581542969, 353.7945251464844, 348.3109436035156, 494.8236389160156, 319.4892272949219, 112.84843444824219, 273.49310302734375, 438.250244140625, 230.495849609375, -339.3448181152344, 195.11167907714844, 230.99417114257812, 262.9380798339844, -55.041160583496094, 665.5, 78.52421569824219, -275.39300537109375, 102.26111602783203, 452.5605163574219], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000378.npy"}
|
|
{"epoch": 0.5550660792951542, "step": 379, "batch_size": 64, "mean": 219.2738800048828, "std": 240.3180389404297, "min": -274.9358215332031, "p10": -48.30901565551758, "median": 210.00704956054688, "p90": 495.47825622558594, "max": 977.2258911132812, "pos_frac": 0.8125, "sample": [211.71612548828125, 326.51763916015625, 84.82528686523438, 258.4111328125, -84.95552825927734, 50.403465270996094, 154.9227294921875, 605.187255859375, 98.64437866210938, 131.37747192382812, 313.7908630371094, 760.9769897460938, 77.07096862792969, 335.3808898925781, 353.19482421875, 243.14712524414062, -274.9358215332031, 701.8193969726562, 57.07649230957031, 219.57852172851562, 411.3302307128906, 436.2031555175781, 256.88043212890625, -4.006908416748047, 76.43380737304688, -19.580717086791992, -207.62582397460938, 149.7145233154297, 6.290672302246094, 290.35931396484375, 540.0582885742188, 381.3453369140625, -198.87384033203125, 218.45266723632812, 382.8434753417969, 492.326171875, 298.81146240234375, 350.27508544921875, 494.7311096191406, 91.22218322753906, -49.430442810058594, 495.7984619140625, 155.62567138671875, 0.8578987121582031, 380.552978515625, -45.692352294921875, 140.41790771484375, 415.38726806640625, 314.63934326171875, 67.48338317871094, -123.66970825195312, 256.5677185058594, 559.1820068359375, 140.7436065673828, 977.2258911132812, 208.2979736328125, 180.87115478515625, 377.1795349121094, 172.9557647705078, -29.998287200927734, 24.523056030273438, 468.2129211425781, -83.81982421875, -41.72438049316406], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000379.npy"}
|
|
{"epoch": 0.5565345080763583, "step": 380, "batch_size": 64, "mean": 232.85060119628906, "std": 246.2967071533203, "min": -559.47900390625, "p10": -54.98052978515622, "median": 241.77188873291016, "p90": 520.6568206787111, "max": 811.065185546875, "pos_frac": 0.84375, "sample": [124.78326416015625, 150.7772979736328, -23.706710815429688, 811.065185546875, 218.1370849609375, 94.20940399169922, 148.4530487060547, 392.18035888671875, 359.63134765625, 252.45985412597656, 81.37737274169922, 434.71063232421875, -29.802330017089844, 547.8810424804688, 661.1244506835938, 448.6759338378906, 268.10028076171875, 334.7476806640625, 427.2113037109375, 110.48114013671875, 277.3597717285156, -251.92938232421875, 207.24822998046875, 128.21466064453125, 65.34066772460938, -77.71996307373047, 454.59619140625, 258.6899719238281, 184.72244262695312, 545.495361328125, 332.71575927734375, 462.7002258300781, 425.55633544921875, 450.2147216796875, 375.8171691894531, 370.09039306640625, 88.32075500488281, 15.468048095703125, 231.08392333984375, 69.92992401123047, 668.607666015625, -3.617229461669922, -188.96360778808594, 142.627685546875, -65.77118682861328, 212.78663635253906, 445.5823974609375, 413.1809997558594, 46.47492218017578, -239.27919006347656, 366.960205078125, 53.52337646484375, 590.8046264648438, 258.911865234375, 223.4058837890625, -92.66846466064453, 415.80450439453125, 314.0926513671875, 136.77294921875, 34.7286376953125, -559.47900390625, 309.060791015625, 672.5533447265625, 319.924560546875], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000380.npy"}
|
|
{"epoch": 0.5580029368575624, "step": 381, "batch_size": 64, "mean": 178.73831176757812, "std": 262.8310546875, "min": -467.479736328125, "p10": -118.89214935302734, "median": 168.44281768798828, "p90": 520.9843566894531, "max": 751.31884765625, "pos_frac": 0.734375, "sample": [196.3746337890625, 9.067230224609375, 1.8653278350830078, 521.6536254882812, 453.0739440917969, 556.6369018554688, 431.51019287109375, 4.5578460693359375, 101.85535430908203, 506.4342346191406, 398.30718994140625, 113.20628356933594, 376.669921875, 256.723388671875, 751.31884765625, -133.243408203125, 388.1942138671875, 389.18853759765625, 571.0089721679688, 305.9812316894531, 733.1188354492188, 604.5322265625, -203.598388671875, -122.45893859863281, -96.06661224365234, 159.34979248046875, 519.4227294921875, -34.49931716918945, -242.54815673828125, -2.4692745208740234, 102.07853698730469, 204.33489990234375, -51.25077819824219, -49.83675765991211, 170.29263305664062, -80.12174987792969, 220.88487243652344, 152.44195556640625, 221.13986206054688, -0.588775634765625, 9.555656433105469, 90.6409912109375, 718.2634887695312, 234.91415405273438, 310.26776123046875, 297.2169494628906, 58.89752197265625, -110.56964111328125, 458.2669677734375, -50.30803298950195, 66.63762664794922, 390.9859924316406, -25.125640869140625, 186.8529510498047, 71.16878509521484, -196.16824340820312, 127.77444458007812, 327.9046630859375, 341.0592956542969, 229.31930541992188, 166.59300231933594, -467.479736328125, -399.2643127441406, 197.30596923828125], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000381.npy"}
|
|
{"epoch": 0.5594713656387665, "step": 382, "batch_size": 64, "mean": 203.7138671875, "std": 202.09686279296875, "min": -216.81800842285156, "p10": -68.37089843749997, "median": 156.13967895507812, "p90": 524.2022094726562, "max": 748.1982421875, "pos_frac": 0.84375, "sample": [748.1982421875, 526.185791015625, 141.44436645507812, 111.1242904663086, 133.22364807128906, 574.200439453125, -101.66204071044922, 66.0616226196289, 227.444580078125, -43.82025909423828, 128.8439483642578, 231.7520294189453, 367.50579833984375, 289.4730224609375, 346.59442138671875, 303.9661865234375, 150.94700622558594, -21.290756225585938, 383.1032409667969, 99.49220275878906, 369.81292724609375, -82.86009216308594, 68.13203430175781, 268.8554992675781, 519.5738525390625, -78.8926010131836, 322.982177734375, -95.42410278320312, 411.3296203613281, 216.79132080078125, 567.2841796875, -79.5904769897461, -6.205715179443359, 287.4130554199219, 274.4122619628906, -216.81800842285156, -84.67465209960938, 324.66937255859375, 2.5264739990234375, 277.540283203125, 103.7176742553711, 181.93331909179688, 320.0517578125, 53.339317321777344, 161.3323516845703, 108.31803894042969, 255.72853088378906, 239.05068969726562, 150.7003936767578, 149.6997833251953, 607.85888671875, 534.2455444335938, 278.4898681640625, 56.366966247558594, 149.90582275390625, 543.6189575195312, 59.80656051635742, 29.652767181396484, 362.01556396484375, 416.0307312011719, 140.3528594970703, 92.66039276123047, 18.181747436523438, 94.98275756835938], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000382.npy"}
|
|
{"epoch": 0.5609397944199707, "step": 383, "batch_size": 64, "mean": 206.43734741210938, "std": 270.03265380859375, "min": -638.0016479492188, "p10": -96.03634033203123, "median": 209.94802856445312, "p90": 564.7625244140626, "max": 825.189208984375, "pos_frac": 0.78125, "sample": [39.620033264160156, -66.06417846679688, 90.57546997070312, -220.85556030273438, 458.4476318359375, -103.95219421386719, -102.09526824951172, 554.5771484375, 305.0192565917969, -29.912487030029297, 203.17465209960938, 251.81634521484375, 72.2342529296875, -56.91603088378906, -239.3157501220703, 216.72140502929688, 810.922607421875, 47.55976104736328, 254.0703125, 241.1158905029297, 647.1585693359375, -196.46920776367188, 50.110721588134766, 546.3247680664062, 200.60888671875, -81.89884185791016, 615.80224609375, 127.64646911621094, -56.07818603515625, 223.8228302001953, 825.189208984375, 183.28549194335938, 522.7803955078125, 224.15765380859375, 365.27392578125, 242.4590606689453, 427.019775390625, 86.06895446777344, 242.47361755371094, 281.39837646484375, -5.561004638671875, 19.9083251953125, 189.23204040527344, 104.25273132324219, 265.9918518066406, 605.6181030273438, -67.31097412109375, 470.77886962890625, 28.542922973632812, 284.55950927734375, 622.2390747070312, 126.58094024658203, -111.74995422363281, 240.745361328125, 388.52838134765625, 48.36878204345703, 569.127685546875, 483.21527099609375, 132.27520751953125, 468.44537353515625, -638.0016479492188, 62.96444320678711, 322.9200439453125, 396.4401550292969], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000383.npy"}
|
|
{"epoch": 0.5624082232011748, "step": 384, "batch_size": 64, "mean": 173.94912719726562, "std": 184.17721557617188, "min": -241.58746337890625, "p10": -61.82555847167967, "median": 155.5501708984375, "p90": 403.2276641845703, "max": 689.1553344726562, "pos_frac": 0.84375, "sample": [167.07647705078125, 56.033721923828125, 644.2196655273438, -70.01744079589844, 8.856735229492188, 73.44017028808594, -167.6257781982422, -14.152938842773438, 689.1553344726562, 132.75442504882812, 458.2162170410156, 319.48565673828125, 173.21067810058594, -123.49388885498047, 259.2810363769531, 96.14532470703125, -42.71116638183594, 44.63875961303711, 284.50140380859375, 362.5137939453125, 112.18405151367188, 146.71066284179688, 115.38484191894531, 265.93609619140625, 223.9044189453125, 107.96638488769531, 66.23125457763672, -105.29447174072266, -241.58746337890625, 114.19794464111328, 253.8892822265625, 131.78709411621094, 36.73523712158203, 227.15975952148438, 180.26617431640625, -140.7086181640625, 397.70159912109375, 242.02853393554688, 278.66552734375, 40.361053466796875, 71.50425720214844, 215.29891967773438, 290.8960266113281, 404.171142578125, 197.75091552734375, 260.2332763671875, 434.27496337890625, 190.01516723632812, 164.38967895507812, 71.92583465576172, 286.91796875, -73.4867172241211, 131.28240966796875, 554.1251220703125, 143.23403930664062, -39.56855773925781, 70.0533447265625, 401.0262145996094, 263.2231750488281, 134.8448944091797, 414.0238342285156, 314.6151428222656, 117.70840454101562, 309.23687744140625], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000384.npy"}
|
|
{"epoch": 0.5638766519823789, "step": 385, "batch_size": 64, "mean": 205.85772705078125, "std": 194.9279327392578, "min": -237.67959594726562, "p10": -5.311087036132812, "median": 193.90961456298828, "p90": 502.10599060058604, "max": 586.6265258789062, "pos_frac": 0.875, "sample": [50.75433349609375, -5.375114440917969, -5.161689758300781, 251.74929809570312, 405.2138671875, 235.01577758789062, 416.57293701171875, 125.12139892578125, -119.1777114868164, 395.2589111328125, 188.7141876220703, 41.85472106933594, 78.0281753540039, 108.76081848144531, 465.963134765625, 16.368736267089844, 566.5564575195312, 299.11468505859375, 132.68014526367188, 121.3060073852539, 306.9776611328125, 176.76504516601562, -121.92921447753906, 314.1435241699219, -153.5810546875, 219.78762817382812, 116.60081481933594, 543.72314453125, 72.76152038574219, 65.74978637695312, 316.62042236328125, 188.47300720214844, 120.76681518554688, 586.6265258789062, 561.333251953125, 82.41816711425781, 199.10504150390625, -5.7941131591796875, 520.30029296875, 284.80718994140625, 13.408432006835938, 100.26289367675781, 364.1315002441406, 205.66062927246094, 220.41912841796875, 514.3131103515625, 341.1737365722656, 45.750701904296875, 473.6227111816406, 261.2056884765625, -237.67959594726562, 71.36244201660156, 148.85305786132812, -226.65469360351562, 220.79591369628906, 230.41275024414062, 417.99041748046875, 140.378173828125, 246.57859802246094, 571.9376220703125, 150.61878967285156, 308.26519775390625, 149.07785034179688, 278.0355529785156], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000385.npy"}
|
|
{"epoch": 0.5653450807635829, "step": 386, "batch_size": 64, "mean": 216.52902221679688, "std": 239.32798767089844, "min": -302.24530029296875, "p10": -49.298160552978516, "median": 194.41921997070312, "p90": 554.4645324707031, "max": 798.0344848632812, "pos_frac": 0.828125, "sample": [-47.75331497192383, 283.8639831542969, -49.76854705810547, 375.3880920410156, 570.2996826171875, 195.40382385253906, 108.0641860961914, 394.0133972167969, 161.71658325195312, 402.77734375, 328.20269775390625, 759.01318359375, -208.99229431152344, -21.071870803833008, 562.3321533203125, 277.57269287109375, 288.68377685546875, 391.9272766113281, 16.75871467590332, 70.83740997314453, 110.52684783935547, 235.09837341308594, 162.6246337890625, 158.62002563476562, 200.8026123046875, 274.0644226074219, 135.03579711914062, -13.27358627319336, 798.0344848632812, 536.1067504882812, -48.200592041015625, 104.45904541015625, 123.84487915039062, -130.55291748046875, 203.50950622558594, 631.8931884765625, 745.499755859375, 345.47003173828125, 638.282958984375, 110.0873031616211, 195.76791381835938, 457.90185546875, -185.41175842285156, 132.8645782470703, 36.54620361328125, 183.16586303710938, 136.50648498535156, 193.4346160888672, -118.25666809082031, 299.0670471191406, -302.24530029296875, 467.4212646484375, 58.937416076660156, 81.77141571044922, 47.754974365234375, 115.4179458618164, 439.05535888671875, 343.0651550292969, -246.9429168701172, 291.1383972167969, 326.537353515625, 247.4468231201172, 365.5418701171875, 110.1698226928711], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000386.npy"}
|
|
{"epoch": 0.566813509544787, "step": 387, "batch_size": 64, "mean": 177.15090942382812, "std": 197.06118774414062, "min": -234.31903076171875, "p10": -40.22748222351074, "median": 168.4553680419922, "p90": 414.1745727539064, "max": 770.1693725585938, "pos_frac": 0.78125, "sample": [263.8907165527344, -27.20476531982422, 137.1886444091797, 137.8000030517578, 111.9049072265625, 770.1693725585938, 100.56344604492188, 80.92237854003906, 326.87225341796875, 468.6423645019531, -194.47149658203125, 286.93951416015625, 109.52349853515625, 151.85060119628906, -234.31903076171875, 241.55654907226562, 163.1826171875, 389.4307861328125, -39.63800811767578, 72.38784790039062, 596.7713012695312, 92.43612670898438, 173.72811889648438, 657.216552734375, 326.0547180175781, 178.05392456054688, 104.13883209228516, 186.63522338867188, 68.3704605102539, 116.31622314453125, 48.13715362548828, 185.48902893066406, 296.4918518066406, 86.85133361816406, 429.3548278808594, 177.77603149414062, 306.75347900390625, 77.10563659667969, 265.3735656738281, 316.7460632324219, 98.6378402709961, -40.4801139831543, 543.8442993164062, 338.01129150390625, 290.2406311035156, 105.78680419921875, 294.3340759277344, -55.8956184387207, 424.779052734375, -79.23716735839844, 308.7503662109375, -14.922760009765625, -4.354896545410156, 267.7835388183594, -172.52078247070312, 181.01956176757812, -5.1181488037109375, 291.18310546875, -5.161346435546875, -10.305130004882812, 226.57545471191406, -180.3563690185547, 217.7520751953125, 310.31964111328125], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000387.npy"}
|
|
{"epoch": 0.5682819383259912, "step": 388, "batch_size": 64, "mean": 239.53643798828125, "std": 273.58392333984375, "min": -459.57281494140625, "p10": -0.7169088363647349, "median": 208.55492401123047, "p90": 568.7348144531253, "max": 1262.5296630859375, "pos_frac": 0.890625, "sample": [1262.5296630859375, 325.8957824707031, -52.9841194152832, 504.64764404296875, 470.9176330566406, 735.7045288085938, 188.30397033691406, -225.77801513671875, 103.67345428466797, 275.44964599609375, 219.42483520507812, 749.2763671875, 462.4022216796875, 228.248779296875, -459.57281494140625, 45.416725158691406, 191.896240234375, 237.29083251953125, 399.1585693359375, 28.874427795410156, 123.2264633178711, 627.0223999023438, 123.85108184814453, -40.718360900878906, 153.0026092529297, 254.19021606445312, 43.903709411621094, 17.177352905273438, 40.510955810546875, 224.1832275390625, 29.91838836669922, 140.6415557861328, 250.83937072753906, 19.871917724609375, 354.46893310546875, 188.77880859375, 108.93022155761719, 96.61576080322266, 427.8499755859375, 106.5938491821289, 263.8912658691406, 129.1937713623047, 329.239013671875, 126.16434478759766, 159.17657470703125, 90.78699493408203, 356.0599365234375, 276.1278076171875, -191.36300659179688, 349.0952453613281, 311.5781555175781, 946.3473510742188, -146.7013397216797, 344.3310546875, 197.6850128173828, 22.34124755859375, -5.406646728515625, 357.3949279785156, 596.2007446289062, 674.595458984375, 502.45208740234375, 421.0743713378906, 228.2075958251953, 10.225812911987305], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000388.npy"}
|
|
{"epoch": 0.5697503671071953, "step": 389, "batch_size": 64, "mean": 206.39407348632812, "std": 244.87132263183594, "min": -266.3153991699219, "p10": -115.8690391540527, "median": 209.9978790283203, "p90": 498.76277160644537, "max": 779.5155029296875, "pos_frac": 0.796875, "sample": [64.66763305664062, 502.91839599609375, 388.8075256347656, -245.7634735107422, 79.01242065429688, 279.876220703125, 196.2613067626953, 455.37847900390625, 291.6236572265625, 203.6666259765625, 228.46652221679688, -74.1158218383789, 216.32913208007812, -193.41903686523438, 142.56944274902344, 334.2230224609375, 152.00863647460938, 396.502685546875, 388.9767761230469, -56.433502197265625, -266.3153991699219, 14.409305572509766, 531.7657470703125, 18.106990814208984, 244.1220703125, 489.0663146972656, -193.30181884765625, 43.51689147949219, 140.93728637695312, 154.9472198486328, 310.93316650390625, 52.954132080078125, 137.6986846923828, 325.7848205566406, -228.44708251953125, 274.29107666015625, 178.51939392089844, 243.37493896484375, 321.433837890625, 768.57470703125, 384.5499267578125, -42.33204650878906, 342.36676025390625, 197.4744415283203, 292.04327392578125, 359.49969482421875, 53.68994140625, 134.6897735595703, 433.18804931640625, 94.26746368408203, -11.729209899902344, 693.49169921875, -11.265121459960938, -154.15383911132812, 252.4674530029297, 718.63720703125, 7.5657958984375, 779.5155029296875, 408.4773864746094, -10.990795135498047, 225.09390258789062, -133.76327514648438, 217.16807556152344, 665.3405151367188], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000389.npy"}
|
|
{"epoch": 0.5712187958883994, "step": 390, "batch_size": 64, "mean": 204.24429321289062, "std": 287.8711853027344, "min": -450.361328125, "p10": -112.69650268554686, "median": 233.84265899658203, "p90": 462.81529846191404, "max": 1551.5533447265625, "pos_frac": 0.796875, "sample": [247.35411071777344, 249.2731170654297, 562.73193359375, 430.052978515625, -450.361328125, -103.47113800048828, -125.74979400634766, 145.99009704589844, 314.5704345703125, 308.5056457519531, 428.5046691894531, 37.53437805175781, 250.42825317382812, 333.42926025390625, 152.30572509765625, 68.35946655273438, 3.4124221801757812, 1551.5533447265625, -116.65023040771484, 414.52825927734375, 131.9628143310547, 371.1455383300781, 463.2484436035156, 113.41018676757812, 122.58998107910156, -84.04522705078125, 152.965576171875, 0.66552734375, 304.7025451660156, 242.55419921875, 118.1297607421875, -2.59033203125, -249.24331665039062, -40.528846740722656, 157.52232360839844, 469.38079833984375, 721.724609375, 485.68841552734375, -368.9352722167969, 305.8321533203125, 461.80462646484375, 474.12066650390625, 296.9352722167969, 168.67196655273438, 434.337158203125, 165.74195861816406, 171.107421875, 68.81852722167969, 421.23321533203125, 387.4133605957031, 342.4378967285156, 187.99998474121094, 247.36703491210938, 251.69110107421875, 316.87249755859375, -40.621360778808594, -2.884429931640625, -225.9625701904297, -388.4798889160156, 225.13111877441406, 327.64202880859375, 113.09353637695312, 263.1645812988281, 285.5182800292969], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000390.npy"}
|
|
{"epoch": 0.5726872246696035, "step": 391, "batch_size": 64, "mean": 136.22325134277344, "std": 215.4071044921875, "min": -333.5400390625, "p10": -87.74283752441406, "median": 93.58790588378906, "p90": 393.82459716796876, "max": 826.8522338867188, "pos_frac": 0.71875, "sample": [-32.17494583129883, 11.165718078613281, 35.887367248535156, -41.53587341308594, 99.2037582397461, 264.82269287109375, 32.62718963623047, 87.97205352783203, 41.21852493286133, 266.3194274902344, -109.6236801147461, 160.23622131347656, -181.52972412109375, 16.29431915283203, 79.93278503417969, 62.83831024169922, 255.17527770996094, 130.10382080078125, -4.605779647827148, -78.91680908203125, 159.98130798339844, 397.68731689453125, 350.26324462890625, 505.6387939453125, -36.644203186035156, 136.95216369628906, 826.8522338867188, -58.54554748535156, -33.621978759765625, 220.50103759765625, 271.0428161621094, -18.843467712402344, 78.27421569824219, 45.425296783447266, -94.62703704833984, 244.9434356689453, 143.11166381835938, 19.552040100097656, 540.7220458984375, 151.26885986328125, -46.47901153564453, -59.173912048339844, 158.79244995117188, 308.7739562988281, 591.2186279296875, -91.52542114257812, 490.30322265625, -21.726919174194336, -214.6925506591797, 14.547630310058594, 46.85731506347656, 143.990234375, -158.77890014648438, 350.443115234375, 384.81158447265625, 70.08210754394531, 659.024658203125, 318.63232421875, 273.30877685546875, -333.5400390625, 214.5084228515625, 234.2022705078125, 247.6019287109375, 191.76171875], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000391.npy"}
|
|
{"epoch": 0.5741556534508077, "step": 392, "batch_size": 64, "mean": 229.41700744628906, "std": 215.0714569091797, "min": -196.5216522216797, "p10": 2.3549690246582085, "median": 188.1477508544922, "p90": 474.91748046875006, "max": 922.649169921875, "pos_frac": 0.90625, "sample": [106.24073028564453, 172.39370727539062, -108.13723754882812, 75.38772583007812, 280.83343505859375, -8.744178771972656, 491.70263671875, 198.15467834472656, 317.11541748046875, 309.9779052734375, 435.7030029296875, -196.5216522216797, 256.6971435546875, 127.86184692382812, 120.95079040527344, 35.022247314453125, 480.81256103515625, 583.4212036132812, 371.26666259765625, 156.15252685546875, -11.941215515136719, 280.02874755859375, 407.19525146484375, 73.70201110839844, 337.7991943359375, 314.43463134765625, 222.1021728515625, 922.649169921875, 19.992828369140625, 225.1363525390625, 178.1408233642578, 91.8629150390625, 361.8011474609375, 160.06863403320312, 409.99371337890625, 160.5369415283203, 279.21343994140625, 166.32888793945312, 163.71437072753906, 102.45307922363281, 312.869384765625, 347.3564147949219, 48.92411804199219, 7.436286926269531, 461.16229248046875, 709.3646240234375, 288.1082763671875, 412.77313232421875, 343.73492431640625, -161.70921325683594, 90.05040740966797, 132.13389587402344, 68.34310913085938, 256.1959228515625, 134.6894989013672, 147.2629852294922, 282.29266357421875, 20.263652801513672, 793.403076171875, 0.1772613525390625, -104.0145492553711, 64.11628723144531, 314.52081298828125, 641.7288818359375], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000392.npy"}
|
|
{"epoch": 0.5756240822320118, "step": 393, "batch_size": 64, "mean": 185.52931213378906, "std": 277.7700500488281, "min": -240.36178588867188, "p10": -125.24455795288085, "median": 198.52427673339844, "p90": 460.63665161132815, "max": 1279.1119384765625, "pos_frac": 0.71875, "sample": [222.13722229003906, 517.5194702148438, 37.98078536987305, -82.94293975830078, 266.23895263671875, 394.5962219238281, -117.94974517822266, 356.05902099609375, 244.79566955566406, 218.46316528320312, 83.2137451171875, 212.30072021484375, 162.992431640625, 335.5484924316406, 173.08384704589844, 64.70829010009766, 829.26513671875, 888.2669067382812, 443.4165344238281, -5.037933349609375, 141.985107421875, 312.62652587890625, -240.36178588867188, -217.45660400390625, -46.53770446777344, -27.292400360107422, -183.9479217529297, 206.27313232421875, 309.1399230957031, 256.3487548828125, 190.77542114257812, 381.92388916015625, 452.6143798828125, 229.7489013671875, 447.75311279296875, 228.1765899658203, 223.18978881835938, 97.64456939697266, 338.5842590332031, -33.12110900878906, 7.4790191650390625, 139.3614959716797, 510.78839111328125, -104.98603057861328, -125.980712890625, 464.07476806640625, 317.9652404785156, -163.6552734375, 46.66828155517578, 116.19347381591797, 104.52686309814453, 211.3772735595703, -123.52686309814453, -103.25643157958984, -172.703857421875, 342.1460876464844, 328.0883483886719, -226.73727416992188, 224.78170776367188, 40.570457458496094, -123.45537567138672, 1279.1119384765625, 594.234619140625, -21.912628173828125], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000393.npy"}
|
|
{"epoch": 0.5770925110132159, "step": 394, "batch_size": 64, "mean": 199.66749572753906, "std": 250.98651123046875, "min": -234.57363891601562, "p10": -87.16026229858397, "median": 151.56459045410156, "p90": 523.4576232910157, "max": 885.9522705078125, "pos_frac": 0.765625, "sample": [126.73799133300781, 344.89544677734375, -188.27040100097656, 347.4533386230469, 207.64300537109375, -45.9342041015625, 149.0083770751953, 128.48524475097656, 473.73699951171875, 62.572174072265625, 498.1498718261719, 25.016014099121094, -234.57363891601562, 50.23326110839844, -36.7491455078125, -169.14144897460938, -117.41251373291016, 640.9922485351562, 526.224365234375, -190.0862274169922, 417.5927734375, -188.3056640625, 285.96380615234375, 283.4745788574219, 154.1208038330078, 283.2108154296875, 285.3690185546875, 44.43064880371094, -92.63268280029297, 517.0018920898438, 488.736328125, 604.0253295898438, -74.39128112792969, 347.4472351074219, 183.83746337890625, 43.32585144042969, 126.44481658935547, -12.837379455566406, 89.431396484375, 591.27099609375, 17.13318634033203, 492.6425476074219, -30.254070281982422, 164.07786560058594, 106.29994201660156, -54.248046875, 445.3104553222656, 697.22998046875, 217.66127014160156, 126.64657592773438, 411.1746826171875, 245.4199676513672, 885.9522705078125, 707.4647827148438, 301.81756591796875, 41.390769958496094, 21.018234252929688, -12.831249237060547, 62.804786682128906, 390.39996337890625, 219.94723510742188, -28.094316482543945, 100.4952621459961, 272.7620544433594], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000394.npy"}
|
|
{"epoch": 0.57856093979442, "step": 395, "batch_size": 64, "mean": 211.40679931640625, "std": 275.044921875, "min": -452.6563720703125, "p10": -51.792471694946286, "median": 186.3069610595703, "p90": 575.8296630859377, "max": 890.506103515625, "pos_frac": 0.734375, "sample": [388.4875793457031, -152.42459106445312, 139.4378662109375, 38.90631103515625, 119.16822814941406, 323.3236083984375, 296.17279052734375, 243.7205810546875, -204.96466064453125, 219.78990173339844, -3.4387359619140625, 376.80682373046875, 473.6482238769531, -48.682090759277344, 123.44271087646484, 4.062694549560547, 526.611083984375, 262.76837158203125, 723.0316162109375, -48.4976806640625, 514.5404052734375, -46.80760955810547, 483.7058410644531, -47.34318542480469, 192.34878540039062, -26.177276611328125, 262.685546875, 392.5085144042969, -12.982200622558594, 28.221397399902344, 767.464111328125, 890.506103515625, 281.71044921875, 94.17138671875, 276.330078125, 12.7191162109375, 140.052978515625, 161.0059356689453, 670.20458984375, 241.654541015625, 411.73828125, 432.8760681152344, -452.6563720703125, -82.92396545410156, -214.25759887695312, 299.6846923828125, -35.86875915527344, 697.2650146484375, -9.726186752319336, -34.14643859863281, 206.56761169433594, -185.66397094726562, 51.48345184326172, 868.8734130859375, 596.92333984375, 248.79930114746094, 180.26513671875, 421.0430908203125, 133.6802215576172, 392.43865966796875, 434.2515563964844, 51.53571319580078, 93.08756256103516, -53.125492095947266], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000395.npy"}
|
|
{"epoch": 0.580029368575624, "step": 396, "batch_size": 64, "mean": 195.95245361328125, "std": 211.47569274902344, "min": -292.5875244140625, "p10": -56.26165733337399, "median": 178.9070053100586, "p90": 450.36929931640634, "max": 873.40283203125, "pos_frac": 0.875, "sample": [243.8358612060547, 370.65478515625, 41.976036071777344, 176.82357788085938, 177.5380859375, 465.3062744140625, 119.61363983154297, 114.43649291992188, 183.48065185546875, 165.05828857421875, 294.01947021484375, 267.5667724609375, 183.42178344726562, 405.6538391113281, -112.20626831054688, 180.2759246826172, 195.98724365234375, 93.43037414550781, 71.60519409179688, 183.483642578125, 148.13671875, 348.31085205078125, 827.455810546875, 282.7068786621094, 196.39535522460938, 423.2236633300781, 334.5557861328125, 147.9361572265625, 88.34046173095703, 500.29669189453125, -69.95216369628906, 359.23150634765625, -77.1370620727539, 235.06723022460938, 60.85618209838867, 141.23435974121094, 252.0574951171875, -81.0708999633789, 333.25390625, 561.927978515625, 243.55300903320312, 13.54962158203125, -110.98895263671875, 462.0031433105469, 41.91943359375, -24.317142486572266, 353.73333740234375, 19.84546661376953, 35.08343505859375, 195.67327880859375, 684.4508666992188, 66.5950698852539, 61.4530029296875, -98.98358154296875, 66.76664733886719, 210.25169372558594, 333.2320251464844, 124.9903793334961, 9.475013732910156, -292.5875244140625, 108.78959655761719, 873.40283203125, 210.235595703125, 118.04263305664062], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000396.npy"}
|
|
{"epoch": 0.5814977973568282, "step": 397, "batch_size": 64, "mean": 259.2659912109375, "std": 229.28956604003906, "min": -268.17974853515625, "p10": -2.731680297851552, "median": 222.11256408691406, "p90": 512.6396850585937, "max": 874.3836059570312, "pos_frac": 0.890625, "sample": [13.098453521728516, 433.14593505859375, 37.507781982421875, 313.7345275878906, 138.26260375976562, 139.56777954101562, 166.2413330078125, 234.7635498046875, 440.13043212890625, 192.73558044433594, 451.2042541503906, 95.20359802246094, 470.41180419921875, -27.47149658203125, 145.14822387695312, 461.9681701660156, 260.8529968261719, -69.66452026367188, 402.41094970703125, 525.2532348632812, 286.6343688964844, 498.779296875, 275.16607666015625, 359.5302429199219, 822.092041015625, 77.42213439941406, 575.3028564453125, 468.75616455078125, -115.23593139648438, 162.2006378173828, 458.2685546875, 189.9696807861328, 127.39321899414062, 700.068115234375, 436.5237731933594, 187.52664184570312, -7.096111297607422, 265.71502685546875, 180.03738403320312, 170.09378051757812, -268.17974853515625, -110.5995101928711, 62.73796844482422, -147.12960815429688, 513.432861328125, 121.17945861816406, 79.50061798095703, 174.8975830078125, 7.451992034912109, 496.39501953125, 39.50165939331055, 510.7889404296875, 300.2466735839844, 874.3836059570312, 196.53225708007812, 696.869873046875, 136.40086364746094, 187.43594360351562, 434.8466491699219, 209.46157836914062, 246.11773681640625, 293.3670349121094, 353.4022216796875, 240.3286590576172], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000397.npy"}
|
|
{"epoch": 0.5829662261380323, "step": 398, "batch_size": 64, "mean": 213.44400024414062, "std": 248.39999389648438, "min": -250.2772674560547, "p10": -35.28849563598632, "median": 152.6462173461914, "p90": 497.9842742919922, "max": 902.0596923828125, "pos_frac": 0.828125, "sample": [189.67398071289062, 69.11058044433594, 377.2416076660156, 561.0054931640625, -80.68663024902344, 50.583168029785156, 256.04840087890625, -156.53863525390625, 62.56535339355469, 19.46857452392578, 487.43896484375, -250.2772674560547, 424.68548583984375, 353.73980712890625, 365.7124938964844, 855.7889404296875, 163.6999969482422, 117.92882537841797, 502.5036926269531, 280.11004638671875, 416.67718505859375, 108.77349853515625, 539.615966796875, 312.2942199707031, 870.3436279296875, 9.666702270507812, 90.18487548828125, 137.7500457763672, 437.83306884765625, 262.5575866699219, 41.3917236328125, 174.84234619140625, -89.4404296875, -3.6538848876953125, 179.92709350585938, 89.22398376464844, 5.127037048339844, 299.87115478515625, 117.51850128173828, 199.17332458496094, 72.44374084472656, 403.2769470214844, 94.45366668701172, 47.75421905517578, 126.04940795898438, -38.366615295410156, 341.80120849609375, 5.946197509765625, -13.968215942382812, 141.59243774414062, 178.44789123535156, 902.0596923828125, 443.8920593261719, 688.5394287109375, -132.61004638671875, 405.7001647949219, 339.20697021484375, 418.4758605957031, 474.5074462890625, 81.5743408203125, -28.106216430664062, 12.809661865234375, -9.832313537597656, -144.71307373046875], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000398.npy"}
|
|
{"epoch": 0.5844346549192364, "step": 399, "batch_size": 64, "mean": 208.820556640625, "std": 255.5824432373047, "min": -259.30291748046875, "p10": -120.82381896972656, "median": 181.79762268066406, "p90": 516.3547760009766, "max": 874.681884765625, "pos_frac": 0.75, "sample": [184.67843627929688, 179.40794372558594, 77.23958587646484, -119.45498657226562, 506.0904235839844, -70.17745208740234, 560.71044921875, 41.13129806518555, 175.287353515625, 34.73345947265625, 166.635498046875, -154.89883422851562, 520.7537841796875, -17.38885498046875, 503.8780212402344, 147.3884735107422, 337.2513427734375, -33.118247985839844, -77.41968536376953, 74.01239013671875, 467.6703186035156, 83.42311096191406, 326.31512451171875, 184.1873016357422, 453.16864013671875, 662.9945068359375, 130.81228637695312, 398.02679443359375, -224.61634826660156, 186.72805786132812, 35.45350646972656, 408.00946044921875, 217.25885009765625, 147.05699157714844, 386.8144836425781, -161.66616821289062, 874.681884765625, 438.39703369140625, 610.087158203125, -152.7188262939453, 415.21051025390625, 338.8182067871094, 159.9433135986328, -259.30291748046875, -50.75912094116211, 392.9072265625, -54.129974365234375, 613.9127197265625, 95.8051986694336, -121.41046142578125, 378.5773010253906, 735.1336669921875, 391.037841796875, 186.3154754638672, 492.9572448730469, 193.78445434570312, 165.56265258789062, 261.4039611816406, -189.90689086914062, 257.8810729980469, 432.6456604003906, -60.41911315917969, 159.69992065429688, -79.97624969482422], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000399.npy"}
|
|
{"epoch": 0.5859030837004405, "step": 400, "batch_size": 64, "mean": 295.59234619140625, "std": 259.3102111816406, "min": -372.24322509765625, "p10": -23.480941772460938, "median": 292.39024353027344, "p90": 593.3494812011719, "max": 994.4835205078125, "pos_frac": 0.875, "sample": [194.40371704101562, 412.18719482421875, 341.00128173828125, 219.459228515625, 590.281982421875, 24.686492919921875, 458.8919677734375, 224.94940185546875, -40.23021697998047, 485.0838623046875, 195.21542358398438, -27.00762367248535, 347.4168701171875, 262.54559326171875, -372.24322509765625, 547.3272094726562, 635.1361694335938, 90.066650390625, 214.31837463378906, 339.22711181640625, 750.420654296875, 489.59771728515625, 229.57180786132812, 465.23193359375, 51.458946228027344, 222.37977600097656, 101.62638854980469, 548.6318359375, 302.9742431640625, 388.53338623046875, 342.0240783691406, 119.81695556640625, -70.0467758178711, 321.1822509765625, 74.88611602783203, -177.16429138183594, 12.163261413574219, 642.3938598632812, 294.4404602050781, 445.7021484375, -23.044937133789062, 195.5411376953125, 266.9582214355469, 873.1527099609375, 17.612672805786133, 515.8736572265625, 228.0653076171875, 163.63238525390625, 42.4997444152832, 994.4835205078125, 594.6641235351562, 2.405853271484375, 379.2205810546875, 406.71990966796875, 290.34002685546875, 567.165771484375, -151.0093536376953, 607.7479248046875, 550.5272216796875, 366.40887451171875, 241.90985107421875, 559.2781982421875, 552.883544921875, -23.667800903320312], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000400.npy"}
|
|
{"epoch": 0.5873715124816447, "step": 401, "batch_size": 64, "mean": 173.17677307128906, "std": 303.46435546875, "min": -644.5868530273438, "p10": -121.02416915893555, "median": 149.57107543945312, "p90": 619.2622314453125, "max": 886.2802734375, "pos_frac": 0.6875, "sample": [173.66246032714844, 468.9979248046875, 26.784107208251953, 201.06515502929688, 382.6544189453125, -42.79901885986328, -214.73287963867188, -344.8495178222656, -644.5868530273438, -85.54090881347656, 85.46163177490234, 155.98538208007812, -133.8780517578125, 62.38127899169922, -9.9534912109375, -8.647466659545898, 370.4952392578125, 240.51144409179688, 142.73348999023438, 71.45594787597656, -77.10739135742188, -89.02055358886719, 62.767417907714844, 719.563720703125, -18.636011123657227, 593.0115966796875, 737.1154174804688, 375.002197265625, 621.0367431640625, 219.42677307128906, 115.19705963134766, 258.471435546875, -68.47848510742188, 79.35587310791016, -70.78345489501953, -373.4967041015625, 804.4016723632812, -11.988292694091797, 251.08555603027344, 335.67578125, 178.17449951171875, 495.1134948730469, 280.9508972167969, -48.36404037475586, 238.8425750732422, 145.43545532226562, 191.3770294189453, -79.77848815917969, 886.2802734375, -283.31439208984375, 577.8797607421875, 343.3408508300781, 252.84767150878906, 59.318443298339844, 615.1217041015625, 794.203857421875, 88.82157135009766, 209.44290161132812, 153.70669555664062, 1.7994194030761719, 211.0438232421875, 652.1154174804688, -121.9256820678711, -118.92063903808594], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000401.npy"}
|
|
{"epoch": 0.5888399412628488, "step": 402, "batch_size": 64, "mean": 297.65618896484375, "std": 236.0499725341797, "min": -77.60086059570312, "p10": 18.90421295166017, "median": 245.93435668945312, "p90": 646.9226623535156, "max": 1021.55615234375, "pos_frac": 0.9375, "sample": [374.3564453125, 32.24737548828125, 4.656455993652344, 449.58203125, 242.53958129882812, 323.7192077636719, 8.002090454101562, 249.32913208007812, 56.610877990722656, 458.2841796875, 296.10076904296875, 491.9545593261719, 591.9160766601562, 129.0937042236328, -50.41748046875, 540.7974243164062, 13.185714721679688, 225.65333557128906, 213.13829040527344, 394.244140625, 481.6141052246094, 169.39431762695312, 505.8438415527344, -61.66120147705078, 190.71237182617188, 507.74810791015625, 85.74156188964844, 748.166015625, 67.05842590332031, 720.7822265625, 104.1129150390625, 241.5266571044922, 159.8333282470703, 206.5155029296875, 113.95886993408203, 212.64700317382812, 775.9063720703125, 171.28524780273438, 186.02740478515625, 460.43756103515625, 41.89012145996094, -43.71177291870117, 698.8671875, 733.8179321289062, 325.4954528808594, 336.9125061035156, 628.2491455078125, 144.58193969726562, 279.8100280761719, 1021.55615234375, 406.2196044921875, 487.36492919921875, 92.07295227050781, 654.9255981445312, 116.92437744140625, 297.04632568359375, 276.19366455078125, 215.3876953125, 183.72653198242188, -77.60086059570312, 337.4168395996094, 299.1721496582031, 178.20809936523438, 322.82635498046875], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000402.npy"}
|
|
{"epoch": 0.5903083700440529, "step": 403, "batch_size": 64, "mean": 227.947021484375, "std": 232.1578826904297, "min": -324.13092041015625, "p10": -36.29735317230224, "median": 200.9663314819336, "p90": 534.9727172851562, "max": 791.7926025390625, "pos_frac": 0.84375, "sample": [199.55838012695312, 80.68436431884766, 78.89112091064453, 187.34539794921875, 324.6766662597656, 81.73286437988281, 204.54217529296875, 316.7884521484375, -63.86516571044922, -185.64723205566406, 136.344482421875, 41.288482666015625, -324.13092041015625, 488.9031677246094, 139.7544403076172, 687.8797607421875, 190.4075469970703, 160.33168029785156, 410.0072021484375, 283.17437744140625, 272.575439453125, 110.99134826660156, 592.3250732421875, 83.8044662475586, -41.32769012451172, 540.6890258789062, 443.82550048828125, 442.63812255859375, 202.37428283691406, 791.7926025390625, -22.142833709716797, 313.7842712402344, 521.3700561523438, 116.5277099609375, 94.29589080810547, 344.9228820800781, -143.92782592773438, 274.26727294921875, 540.5178833007812, 388.06573486328125, -211.21482849121094, 217.15219116210938, 677.963623046875, -2.398630142211914, 391.7561340332031, 99.40794372558594, 385.7114562988281, 522.0339965820312, 278.3569641113281, 772.722900390625, 235.44395446777344, 345.1575012207031, 245.49468994140625, 131.40028381347656, 42.25921630859375, 54.36537170410156, -24.559900283813477, 341.0229797363281, 38.27763366699219, 194.1053466796875, 297.406982421875, 148.500732421875, -85.86328125, 188.06900024414062], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000403.npy"}
|
|
{"epoch": 0.591776798825257, "step": 404, "batch_size": 64, "mean": 223.13433837890625, "std": 284.8643493652344, "min": -346.0570983886719, "p10": -166.295166015625, "median": 222.3733139038086, "p90": 618.0854675292969, "max": 963.732666015625, "pos_frac": 0.78125, "sample": [-175.92100524902344, 210.6767120361328, 44.744712829589844, 425.4897155761719, 425.18017578125, 118.79067993164062, 85.12263488769531, 725.106689453125, 209.684814453125, 679.877685546875, 963.732666015625, 168.42831420898438, 273.0947265625, 624.5335693359375, -190.03024291992188, -223.13705444335938, 616.8892211914062, 135.09439086914062, -222.4076690673828, 83.9707260131836, -64.74600219726562, 39.485694885253906, 61.62052917480469, 429.3887939453125, 374.5836181640625, 276.7065734863281, -272.2789611816406, 416.7774963378906, 261.7445068359375, 295.03192138671875, 465.1726989746094, -169.60401916503906, 342.17730712890625, 173.82980346679688, 466.8868713378906, 116.84754180908203, -118.85783386230469, 200.6609344482422, 234.06991577148438, 352.2813415527344, -346.0570983886719, 363.0311279296875, 88.47367858886719, 606.6990966796875, 127.56967163085938, -158.5745086669922, -82.53924560546875, 370.9738464355469, 479.5903015136719, -142.1398468017578, 276.3263854980469, 5.581443786621094, 351.3475341796875, 300.95660400390625, 424.8944396972656, 705.8892822265625, 237.21617126464844, 17.647964477539062, -2.5033512115478516, -112.4951171875, 800.8816528320312, 183.04684448242188, 618.59814453125, 305.482177734375], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000404.npy"}
|
|
{"epoch": 0.593245227606461, "step": 405, "batch_size": 64, "mean": 186.25479125976562, "std": 276.9902648925781, "min": -372.0523376464844, "p10": -158.25507049560545, "median": 147.7390594482422, "p90": 586.6420715332031, "max": 833.593505859375, "pos_frac": 0.734375, "sample": [585.7550659179688, 544.2369384765625, 184.063232421875, -137.21034240722656, -213.14381408691406, -14.118118286132812, -237.76744079589844, 145.8836669921875, 394.1282958984375, -250.4229736328125, 50.54906463623047, 792.2101440429688, 464.0929870605469, 101.71771240234375, 34.433815002441406, 779.9835205078125, -26.886110305786133, -63.285438537597656, 186.291259765625, -126.92616271972656, 95.68643951416016, 200.2340545654297, 126.93177795410156, 417.92535400390625, 135.58984375, 124.18608856201172, 115.53916931152344, 365.7596740722656, -208.4749755859375, -50.771507263183594, 330.1586608886719, -61.92664337158203, 304.60009765625, 95.64815521240234, 346.71319580078125, 15.325492858886719, 203.72747802734375, -176.11033630371094, 41.05976104736328, 4.221195220947266, 483.7853698730469, 265.6217956542969, 595.969482421875, 339.7604675292969, 24.68722152709961, 177.280029296875, 470.2958068847656, -47.01019287109375, 149.59445190429688, 194.649169921875, 291.333251953125, -143.47984313964844, 169.89759826660156, 717.1436767578125, 93.6763687133789, -372.0523376464844, 315.30511474609375, 315.9173889160156, -14.944761276245117, 833.593505859375, 630.0186767578125, -164.58731079101562, 387.221435546875, 587.022216796875], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000405.npy"}
|
|
{"epoch": 0.5947136563876652, "step": 406, "batch_size": 64, "mean": 230.2542266845703, "std": 266.05963134765625, "min": -250.84197998046875, "p10": -74.32944641113276, "median": 206.8630599975586, "p90": 583.2563293457033, "max": 912.6128540039062, "pos_frac": 0.8125, "sample": [325.8960876464844, 103.70832824707031, 173.34396362304688, 119.32267761230469, 260.60693359375, 175.7281494140625, 126.94125366210938, -250.84197998046875, 903.5189208984375, 131.05471801757812, 400.68426513671875, 197.9562530517578, 868.5722045898438, 526.9221801757812, 279.73468017578125, -3.274463653564453, 426.1416931152344, 277.9812316894531, 324.7137756347656, -103.13373565673828, 659.36181640625, 340.7323303222656, 316.04754638671875, -218.6126251220703, 97.9830093383789, 95.83262634277344, -110.11581420898438, 33.813716888427734, 354.8009033203125, -30.285308837890625, 50.11680603027344, 217.72532653808594, 321.8641052246094, -236.99598693847656, -93.20550537109375, 410.57220458984375, 667.4273681640625, 175.01641845703125, 113.25988006591797, 3.6722278594970703, 215.76986694335938, 261.36236572265625, 405.400634765625, -28.497379302978516, -19.853729248046875, 60.476234436035156, 607.3995361328125, 264.4542236328125, 26.30999755859375, 157.34878540039062, 520.047607421875, 510.459716796875, 263.59393310546875, 9.697372436523438, -197.0755157470703, 1.7662010192871094, 743.6924438476562, 359.2381591796875, 912.6128540039062, 333.33233642578125, 330.12896728515625, 440.3919677734375, 125.54460144042969, -1.9183578491210938], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000406.npy"}
|
|
{"epoch": 0.5961820851688693, "step": 407, "batch_size": 64, "mean": 157.9940643310547, "std": 243.44769287109375, "min": -597.6612548828125, "p10": -147.50442657470703, "median": 129.8207664489746, "p90": 476.1306762695313, "max": 640.3458251953125, "pos_frac": 0.78125, "sample": [-220.46031188964844, 467.62933349609375, 315.63079833984375, -236.10618591308594, -204.25062561035156, -22.00749969482422, 316.6304931640625, -215.45278930664062, -141.65304565429688, 101.68128967285156, 398.1136474609375, 177.9893341064453, 235.8656463623047, 585.5848388671875, 517.8004150390625, 56.88218688964844, 39.699405670166016, 427.38482666015625, 95.35669708251953, 6.0687713623046875, 300.6032409667969, 54.35418701171875, 167.26307678222656, 91.81442260742188, 164.90272521972656, 358.5311279296875, 215.99217224121094, 64.11727905273438, 127.31837463378906, 132.3087921142578, -128.3367919921875, 223.01132202148438, 22.957054138183594, -23.89051055908203, 30.06285858154297, 119.65577697753906, 345.2276306152344, 640.3458251953125, 126.52864074707031, -597.6612548828125, 449.01519775390625, 326.61590576171875, 213.3925018310547, 10.040870666503906, 479.77410888671875, 92.17655944824219, 27.44140625, 590.2855834960938, 373.08660888671875, 215.83843994140625, 518.2562866210938, -107.46656036376953, -29.324504852294922, 526.161376953125, -37.123634338378906, 127.3327407836914, 132.4416961669922, 443.5914611816406, 44.60865783691406, -251.79319763183594, 427.1611022949219, 275.59326171875, -150.0121612548828, 277.03289794921875], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000407.npy"}
|
|
{"epoch": 0.5976505139500734, "step": 408, "batch_size": 64, "mean": 159.5050506591797, "std": 194.93576049804688, "min": -264.26983642578125, "p10": -47.75703659057617, "median": 155.55162811279297, "p90": 409.7870208740235, "max": 630.450439453125, "pos_frac": 0.796875, "sample": [187.5697784423828, 557.2583618164062, 630.450439453125, 108.25885009765625, 283.81524658203125, 294.3096923828125, -17.044097900390625, 143.70193481445312, 31.5120849609375, -110.64039611816406, -22.701765060424805, -173.2135467529297, 48.797706604003906, 31.349395751953125, 15.531776428222656, -166.14476013183594, 129.52598571777344, 275.6514587402344, 133.69833374023438, 106.14692687988281, 158.02809143066406, 397.3128662109375, -5.2376708984375, 304.2231140136719, -218.7218475341797, 147.6131591796875, -8.485208511352539, 170.21392822265625, 69.73192596435547, 455.59368896484375, 75.47747039794922, 153.07516479492188, 415.1330871582031, 16.457733154296875, -264.26983642578125, -48.445648193359375, 277.0877685546875, 227.3251495361328, 77.57633972167969, 227.64892578125, 303.0918884277344, 291.3121032714844, 231.3940887451172, 298.45166015625, 209.6051788330078, -215.23300170898438, 459.6590270996094, -46.15027618408203, 66.92939758300781, 21.184722900390625, 20.864900588989258, 285.9617919921875, 182.91268920898438, -33.08563232421875, 184.58230590820312, 263.5251159667969, 25.17945098876953, 379.4942626953125, 240.80030822753906, 284.76995849609375, 525.6065673828125, 543.8682250976562, 347.8602600097656, 220.56704711914062], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000408.npy"}
|
|
{"epoch": 0.5991189427312775, "step": 409, "batch_size": 64, "mean": 189.9278106689453, "std": 209.09889221191406, "min": -249.4674072265625, "p10": -32.203637695312494, "median": 158.73171997070312, "p90": 473.3505584716798, "max": 691.3761596679688, "pos_frac": 0.859375, "sample": [-18.300140380859375, -249.4674072265625, 282.94744873046875, 218.97415161132812, 158.80340576171875, 187.1070556640625, 73.55644989013672, 37.01702880859375, 107.06768798828125, 155.69020080566406, 482.8855285644531, 409.6105651855469, -78.80008697509766, 101.22750854492188, 383.5063781738281, 160.24273681640625, 200.603515625, 226.98997497558594, 216.90438842773438, 633.865234375, 207.1949462890625, 17.117591857910156, 338.2750244140625, 82.01255798339844, 507.48980712890625, -107.75096893310547, 71.05120086669922, 32.6640625, 63.107391357421875, 149.33299255371094, 237.3022918701172, 679.3452758789062, 376.4671630859375, -165.53768920898438, 83.47269439697266, 616.2049560546875, -25.136741638183594, 86.98326110839844, -168.5099334716797, 56.817237854003906, 410.0926818847656, 19.68390655517578, 413.9627685546875, 74.95167541503906, 258.36749267578125, 125.53244018554688, 71.80913543701172, 94.47290802001953, 691.3761596679688, 378.90460205078125, -35.23230743408203, 160.29632568359375, -77.77900695800781, 204.0771942138672, 222.1898956298828, 451.102294921875, 348.12677001953125, 239.53057861328125, 192.88949584960938, 99.09275817871094, 670.2127685546875, 89.74757385253906, 64.97671508789062, 158.6600341796875], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000409.npy"}
|
|
{"epoch": 0.6005873715124816, "step": 410, "batch_size": 64, "mean": 189.38497924804688, "std": 208.2528533935547, "min": -387.8423767089844, "p10": -40.61204261779784, "median": 211.61610412597656, "p90": 475.5162658691406, "max": 675.482421875, "pos_frac": 0.84375, "sample": [274.85302734375, 209.09934997558594, -50.273529052734375, 65.05304718017578, 331.0500793457031, 254.9462890625, 343.04071044921875, 487.9872131347656, 299.87237548828125, 6.260190963745117, 108.13511657714844, 177.9227294921875, 15.332504272460938, 348.49993896484375, -44.676490783691406, -125.859130859375, 488.0325012207031, -300.06866455078125, 1.9673728942871094, 251.7328643798828, 297.5959777832031, 518.443115234375, 10.507369995117188, 45.22456359863281, 467.6058349609375, 511.0050048828125, 221.3948974609375, -31.12833023071289, 291.95361328125, 675.482421875, 293.6956481933594, 29.082149505615234, 264.1568298339844, 42.51690673828125, 254.4409942626953, 75.29525756835938, -17.608007431030273, -5.298849105834961, 121.33773803710938, -138.7545166015625, 214.1328582763672, 285.1152648925781, 132.90707397460938, 354.96807861328125, 168.8888397216797, -387.8423767089844, 120.24465942382812, 233.86940002441406, 477.26959228515625, 252.5954132080078, 99.88162231445312, 187.71957397460938, 294.841552734375, 339.6415100097656, 204.9054718017578, 601.3353881835938, 224.4529571533203, 370.3226318359375, -221.51512145996094, 6.6387786865234375, 208.53158569335938, 142.51629638671875, 471.4251708984375, 267.9370422363281], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000410.npy"}
|
|
{"epoch": 0.6020558002936858, "step": 411, "batch_size": 64, "mean": 194.54104614257812, "std": 230.54824829101562, "min": -358.1988525390625, "p10": -58.06496543884277, "median": 143.44261932373047, "p90": 472.58101501464853, "max": 948.031982421875, "pos_frac": 0.828125, "sample": [245.4443359375, 82.33623504638672, 62.44715118408203, 86.8753890991211, 365.60614013671875, 61.58409881591797, 95.95721435546875, 186.5091552734375, 252.3437957763672, 323.65179443359375, 180.1900634765625, 38.49053192138672, 196.9022216796875, -96.695556640625, 32.21855926513672, -20.049104690551758, 289.9855041503906, 332.5563659667969, 118.34515380859375, 333.5200500488281, 665.1157836914062, 295.5740661621094, 57.20819091796875, -17.470056533813477, 85.1272964477539, 101.73060607910156, 79.95124816894531, 165.66490173339844, 152.714111328125, 801.01953125, 70.78335571289062, 296.0329284667969, 598.25439453125, -61.24408721923828, 390.43402099609375, 38.74284362792969, 270.7666931152344, 383.04888916015625, 507.70721435546875, 127.0305404663086, 90.26285552978516, 352.98486328125, 254.75247192382812, 247.31039428710938, 115.9786148071289, 309.3748474121094, 31.145450592041016, -358.1988525390625, 948.031982421875, 361.97088623046875, 134.17112731933594, 326.68072509765625, 423.39434814453125, 451.0885925292969, 481.79205322265625, -144.73532104492188, 50.223289489746094, -143.43931579589844, 118.96630096435547, -89.36521911621094, 533.3880615234375, -139.07412719726562, -31.842147827148438, -50.64701461791992], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000411.npy"}
|
|
{"epoch": 0.6035242290748899, "step": 412, "batch_size": 64, "mean": 194.49317932128906, "std": 223.86349487304688, "min": -273.4586181640625, "p10": -82.97317123413085, "median": 212.02113342285156, "p90": 432.8961181640626, "max": 797.1581420898438, "pos_frac": 0.78125, "sample": [-135.38946533203125, 199.6285400390625, 335.43853759765625, 321.9110107421875, 336.8876953125, -91.53292083740234, 575.7338256835938, -27.96076202392578, 61.162811279296875, 343.1707458496094, 57.97993469238281, 226.9356689453125, 103.44699096679688, 189.29385375976562, -24.26304817199707, 334.5126953125, 383.50128173828125, 55.49158477783203, -250.63450622558594, 235.358642578125, 134.37066650390625, 79.67105865478516, 492.3990173339844, 230.52980041503906, 70.22846984863281, 261.5388488769531, 18.661041259765625, 259.16424560546875, -90.8099594116211, 149.98031616210938, -2.9406070709228516, 47.779075622558594, 224.41372680664062, 234.08026123046875, 166.69639587402344, 51.27540588378906, -273.4586181640625, -49.5037727355957, 78.65535736083984, 677.99853515625, 70.33038330078125, -133.58836364746094, 395.22467041015625, 404.2747802734375, 565.32421875, -64.68733215332031, 291.2159423828125, 668.0720825195312, 73.7674560546875, 324.91961669921875, 313.173828125, -28.132661819458008, 111.13938903808594, 402.9833984375, 406.5657958984375, 797.1581420898438, -34.617252349853516, 240.13259887695312, 363.4075927734375, -135.35675048828125, 444.1805419921875, 240.48373413085938, 405.5713806152344, 334.6173400878906], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000412.npy"}
|
|
{"epoch": 0.604992657856094, "step": 413, "batch_size": 64, "mean": 177.179931640625, "std": 203.33883666992188, "min": -445.16522216796875, "p10": -63.875746154785155, "median": 179.96045684814453, "p90": 388.29703979492194, "max": 721.0517578125, "pos_frac": 0.84375, "sample": [34.8587646484375, 7.697620391845703, 65.97821044921875, 298.2037048339844, 188.38815307617188, -78.60643005371094, 91.62322998046875, 77.55248260498047, 87.22602081298828, 162.87527465820312, 360.3019714355469, 721.0517578125, 38.55217742919922, 343.768310546875, -13.062705993652344, -62.86598205566406, 268.31805419921875, 67.94921875, 13.723434448242188, 550.1627197265625, 115.53828430175781, 368.8348388671875, 162.95278930664062, -445.16522216796875, 44.1322021484375, 214.90150451660156, 248.44935607910156, 140.65618896484375, 422.97637939453125, 704.1248779296875, 165.0211639404297, 237.8254852294922, 424.883056640625, 324.65509033203125, -79.08807373046875, 375.14892578125, 270.6975402832031, 71.046875, 271.74530029296875, 322.3209228515625, 104.82341003417969, 68.99577331542969, 401.2601013183594, -64.30850219726562, 298.64483642578125, 340.46771240234375, 182.13241577148438, -142.22366333007812, -139.07728576660156, -278.169189453125, 188.3989715576172, 343.1062927246094, 298.59881591796875, 393.93194580078125, 34.66950607299805, 285.7470397949219, 86.65347290039062, 364.6757507324219, 186.77122497558594, 187.220947265625, 177.7884979248047, 160.8633575439453, 285.8580322265625, -12.668121337890625], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000413.npy"}
|
|
{"epoch": 0.6064610866372981, "step": 414, "batch_size": 64, "mean": 228.78590393066406, "std": 222.24481201171875, "min": -373.8294677734375, "p10": -44.23711719512938, "median": 228.76283264160156, "p90": 546.7408264160157, "max": 757.9105224609375, "pos_frac": 0.859375, "sample": [357.97625732421875, 244.23118591308594, 387.7889404296875, 151.4302520751953, 105.85525512695312, 689.6294555664062, 241.185546875, -373.8294677734375, 77.26972961425781, 241.1031494140625, 238.9613037109375, 222.767333984375, 458.79620361328125, 757.9105224609375, 130.25286865234375, -133.20936584472656, 258.316650390625, 103.5654296875, -30.37984275817871, 104.95630645751953, 249.59307861328125, 348.4549560546875, -84.78672790527344, 234.75833129882812, 359.05377197265625, 183.7614288330078, 57.605499267578125, 20.777021408081055, 284.46380615234375, 79.92762756347656, 446.6684875488281, 424.85736083984375, -55.40092086791992, 256.14404296875, 563.5926513671875, 117.93186950683594, 263.6416015625, -50.17594909667969, -69.97010803222656, 315.8116149902344, 219.56214904785156, 170.80328369140625, 527.251953125, -138.71759033203125, 48.607818603515625, 457.5429992675781, 30.115966796875, 555.0932006835938, 129.05471801757812, 82.65858459472656, 274.2060546875, 218.48973083496094, 110.85222625732422, 158.6148223876953, 197.76873779296875, 255.56605529785156, 648.9632568359375, -14.576324462890625, 394.60357666015625, 620.0462646484375, 749.6893920898438, 301.41754150390625, 123.73463439941406, 339.6614685058594], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000414.npy"}
|
|
{"epoch": 0.6079295154185022, "step": 415, "batch_size": 64, "mean": 169.42327880859375, "std": 212.3540496826172, "min": -397.9462585449219, "p10": -59.819906997680654, "median": 145.1109848022461, "p90": 448.1475372314454, "max": 743.735595703125, "pos_frac": 0.78125, "sample": [252.41378784179688, -3.64739990234375, 340.62432861328125, 593.0684204101562, -51.50444030761719, 576.6832275390625, 398.1761169433594, 172.30531311035156, -14.183219909667969, 208.87034606933594, 390.50738525390625, 244.75405883789062, 377.974853515625, 345.8207702636719, 64.14054107666016, -63.3836784362793, 203.83114624023438, -27.827056884765625, 259.21282958984375, 73.93455505371094, 6.851736068725586, 102.02650451660156, 14.677978515625, 460.0048522949219, -72.08956146240234, 84.38211822509766, 560.3760986328125, 13.333641052246094, 157.90277099609375, 295.90106201171875, -48.45146179199219, 275.1023254394531, 189.15679931640625, 64.950927734375, 743.735595703125, -165.0421600341797, -82.05076599121094, 134.25167846679688, 361.2936096191406, 502.5325622558594, 57.6763801574707, 319.0828857421875, 112.2099609375, 101.39129638671875, 69.45294189453125, 160.0287628173828, -47.871620178222656, 148.98460388183594, 420.48046875, -110.63321685791016, 92.92266845703125, 391.1728515625, 205.43338012695312, 27.580078125, 141.23736572265625, -7.32722282409668, 194.61386108398438, 7.662811279296875, 285.92242431640625, -166.11367797851562, 520.5546875, 101.44770812988281, -397.9462585449219, 274.5083923339844], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000415.npy"}
|
|
{"epoch": 0.6093979441997063, "step": 416, "batch_size": 64, "mean": 252.76690673828125, "std": 218.10922241210938, "min": -122.87994384765625, "p10": -35.5440860748291, "median": 255.85347747802734, "p90": 531.0542297363281, "max": 848.289306640625, "pos_frac": 0.84375, "sample": [518.3408813476562, 156.0540771484375, 111.33740234375, 363.19830322265625, -89.69014739990234, 533.9501342773438, 191.51873779296875, 150.09130859375, 340.5345458984375, 655.0191040039062, -6.910621643066406, 18.115081787109375, 54.212745666503906, 269.784912109375, 236.63882446289062, 396.2289123535156, 302.29718017578125, 30.968467712402344, 464.457763671875, 121.52049255371094, 319.6534729003906, 301.4198913574219, 45.5760612487793, 194.75466918945312, 459.3605041503906, 184.95143127441406, 292.8960266113281, 575.4111938476562, 29.553375244140625, 534.6102905273438, 212.76063537597656, 406.013916015625, 848.289306640625, -103.4140396118164, -122.87994384765625, 77.2596664428711, 492.8648376464844, 370.482666015625, -35.90656280517578, 524.297119140625, 60.77587127685547, 733.823974609375, 221.48196411132812, 207.4054718017578, 365.5879821777344, 431.2591552734375, 247.88914489746094, 479.78076171875, 163.4816131591797, 79.15351104736328, -41.33052444458008, 263.81781005859375, -62.101348876953125, 481.22613525390625, 323.25054931640625, -38.026458740234375, 300.26409912109375, -34.698307037353516, 312.06536865234375, -23.270614624023438, 102.04515838623047, 318.1862487792969, 315.89312744140625, 543.49853515625], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000416.npy"}
|
|
{"epoch": 0.6108663729809104, "step": 417, "batch_size": 64, "mean": 216.41831970214844, "std": 188.1266632080078, "min": -128.67955017089844, "p10": -5.429085731506334, "median": 221.2877426147461, "p90": 482.8934783935548, "max": 699.5181884765625, "pos_frac": 0.890625, "sample": [108.35616302490234, 65.32594299316406, 20.5176944732666, 229.2367401123047, 11.21975326538086, 430.8229675292969, 7.3921356201171875, 16.420961380004883, -68.65523529052734, 604.13916015625, 27.37387466430664, 438.6618347167969, 224.0424041748047, 83.044677734375, 225.39939880371094, 29.246444702148438, -67.64610290527344, 523.9238891601562, 595.878173828125, 75.29135131835938, 531.2679443359375, 245.97732543945312, 493.329345703125, 76.6680908203125, 458.5431213378906, 699.5181884765625, 331.1810607910156, 210.2135009765625, 238.19223022460938, 302.479736328125, 108.4917984008789, 203.76597595214844, -10.923894882202148, 116.24549102783203, 269.0068664550781, 397.5671081542969, 97.59066772460938, 242.46983337402344, 339.5673828125, 263.13861083984375, 77.17071533203125, 295.0921936035156, 215.4810028076172, 302.6110534667969, -128.67955017089844, 215.846435546875, 151.72885131835938, 118.59014129638672, 271.1202392578125, 239.8548126220703, 388.78326416015625, -118.15213775634766, 186.42031860351562, 224.467529296875, 106.60083770751953, -109.1568374633789, 186.97048950195312, 221.94677734375, 531.9122314453125, 335.3466491699219, 434.319091796875, 220.6287078857422, 314.2974853515625, -26.71051788330078], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000417.npy"}
|
|
{"epoch": 0.6123348017621145, "step": 418, "batch_size": 64, "mean": 220.99835205078125, "std": 266.65472412109375, "min": -323.30816650390625, "p10": -97.3584762573242, "median": 190.04132843017578, "p90": 537.6430328369141, "max": 980.2803955078125, "pos_frac": 0.796875, "sample": [157.96588134765625, -111.66166687011719, 262.17364501953125, 1.1331596374511719, 125.08213806152344, 752.380126953125, 47.60310363769531, 104.74588012695312, 491.53216552734375, 405.3607482910156, 74.38397216796875, -72.40203857421875, 223.27017211914062, 96.70832061767578, 776.8406982421875, 389.4229736328125, 358.37738037109375, -108.05409240722656, 382.7867126464844, 180.990966796875, -126.70074462890625, 507.0856018066406, -27.004852294921875, 402.4052429199219, 186.32456970214844, 67.10090637207031, 217.6014404296875, 438.646484375, 363.41888427734375, 193.75808715820312, 463.8416748046875, 54.89137268066406, 43.57140350341797, 676.5616455078125, -323.30816650390625, -27.055015563964844, -136.93020629882812, 233.02496337890625, 239.0397186279297, 403.741455078125, 980.2803955078125, -39.665077209472656, 95.86282348632812, 106.00772094726562, 257.0168151855469, 205.77830505371094, 121.58928680419922, 132.98780822753906, -242.02098083496094, -190.7259979248047, 306.01104736328125, 127.94773864746094, 489.2033996582031, -33.12168884277344, 550.7390747070312, 245.29379272460938, 476.84796142578125, 782.014892578125, 123.66129302978516, 6.089458465576172, 337.9723205566406, 714.06201171875, 228.96829223632812, -27.561233520507812], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000418.npy"}
|
|
{"epoch": 0.6138032305433186, "step": 419, "batch_size": 64, "mean": 236.74319458007812, "std": 230.7321014404297, "min": -319.0414123535156, "p10": -39.004879760742185, "median": 236.7195587158203, "p90": 543.5375549316407, "max": 782.1859130859375, "pos_frac": 0.859375, "sample": [609.2393798828125, -319.0414123535156, 246.63015747070312, 402.55731201171875, 174.91334533691406, 432.6239318847656, -93.02578735351562, -29.034534454345703, 10.6695556640625, 523.6129760742188, 552.07666015625, 159.83197021484375, 266.69012451171875, 437.6234130859375, 317.9452819824219, 324.1164855957031, 122.66732788085938, 136.00363159179688, -256.9541015625, 83.3996810913086, 65.61896514892578, 582.6221923828125, -68.37913513183594, 226.8089599609375, 281.00799560546875, -185.26089477539062, 110.1024169921875, 109.61795806884766, 471.041259765625, 732.2479858398438, 248.67529296875, 115.09326171875, 322.95367431640625, 335.75042724609375, 255.62559509277344, 143.60220336914062, 139.67030334472656, 75.4720458984375, 480.9466857910156, 271.8214111328125, 485.8298645019531, 163.62567138671875, 520.6832275390625, 126.58845520019531, 61.749420166015625, 201.0679168701172, 42.76385498046875, -40.37841796875, 599.4102783203125, 411.94207763671875, 491.1545715332031, 252.854248046875, -35.799957275390625, 782.1859130859375, 357.6530456542969, 371.9252624511719, 377.36297607421875, 141.849609375, 84.22708129882812, 561.8253784179688, -42.17332458496094, 24.088043212890625, 122.09473419189453, 271.45068359375], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000419.npy"}
|
|
{"epoch": 0.6152716593245228, "step": 420, "batch_size": 64, "mean": 164.3126678466797, "std": 269.5140686035156, "min": -466.74884033203125, "p10": -135.44315490722656, "median": 114.64263534545898, "p90": 481.51395263671884, "max": 910.3806762695312, "pos_frac": 0.71875, "sample": [-161.64019775390625, 176.851318359375, 144.17190551757812, 97.47116088867188, 392.1893615722656, -92.2716293334961, -26.24138641357422, -12.27899169921875, 233.88816833496094, 520.3335571289062, -32.30207824707031, 53.02674102783203, -32.87431716918945, 96.87651062011719, -56.445556640625, 74.98282623291016, 406.5589599609375, 194.305419921875, 74.19114685058594, 182.28131103515625, 12.622739791870117, 280.52178955078125, -158.00271606445312, 361.8348083496094, 31.25958251953125, 147.10696411132812, 110.64793395996094, 439.0880126953125, 28.82268524169922, -25.831298828125, 434.0601806640625, 374.93035888671875, 362.17132568359375, -137.7068328857422, 118.3735580444336, 225.56985473632812, -130.16123962402344, 15.922828674316406, 492.1769714355469, 599.964599609375, -47.07170867919922, -212.19781494140625, 505.56988525390625, 350.28778076171875, 421.6300048828125, 127.79544067382812, 456.6335754394531, 396.3087463378906, 320.97576904296875, 273.1568908691406, 68.40636444091797, -420.1792907714844, 117.08457946777344, 67.00662231445312, -11.619892120361328, 910.3806762695312, -466.74884033203125, 56.72468948364258, 135.36265563964844, 112.20069122314453, 864.6216430664062, -19.615684509277344, -141.96160888671875, 834.813720703125], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000420.npy"}
|
|
{"epoch": 0.6167400881057269, "step": 421, "batch_size": 64, "mean": 165.87652587890625, "std": 211.7760772705078, "min": -426.0586853027344, "p10": -129.91998443603512, "median": 188.66835021972656, "p90": 444.50881958007824, "max": 629.16796875, "pos_frac": 0.78125, "sample": [251.7240753173828, 79.01193237304688, 148.86553955078125, -140.298095703125, 236.1716766357422, 340.922119140625, 552.4282836914062, -24.01715087890625, 23.288068771362305, 42.41829299926758, 196.6624755859375, -36.16403579711914, 464.6872253417969, 466.19769287109375, 359.3756103515625, 396.5378112792969, 260.555419921875, -146.03553771972656, 176.721923828125, 180.67422485351562, 553.334716796875, 393.47857666015625, 145.385009765625, -168.30429077148438, 218.45155334472656, 87.52348327636719, 323.83660888671875, 233.88497924804688, 69.7520523071289, 222.8987274169922, 420.9051818847656, 273.1704406738281, 497.3577880859375, 214.16732788085938, 234.09963989257812, -243.079833984375, 273.6383056640625, 224.9775390625, -271.84320068359375, 454.6246643066406, 96.31433868408203, -18.198951721191406, 241.7189178466797, 629.16796875, 245.37026977539062, 261.921630859375, 162.1058807373047, 36.44172668457031, 15.921792984008789, 318.306884765625, 101.17570495605469, -46.26848602294922, 283.336181640625, -426.0586853027344, -160.8569793701172, 106.20652770996094, 140.89105224609375, -63.547000885009766, -71.33428955078125, 222.35055541992188, 173.30445861816406, -105.70439147949219, 167.46566772460938, 318.08013916015625], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000421.npy"}
|
|
{"epoch": 0.618208516886931, "step": 422, "batch_size": 64, "mean": 189.65658569335938, "std": 235.8077850341797, "min": -404.20245361328125, "p10": -107.162621307373, "median": 177.32391357421875, "p90": 494.5611846923828, "max": 801.3721313476562, "pos_frac": 0.828125, "sample": [132.91221618652344, 515.3427124023438, 641.5524291992188, 334.43572998046875, 279.7293701171875, -129.3418731689453, -404.20245361328125, 505.60791015625, 151.4800567626953, 801.3721313476562, -136.92747497558594, 192.46405029296875, 119.56974792480469, 42.9490966796875, 385.6619873046875, 128.69088745117188, 105.0771713256836, -55.411033630371094, 17.549625396728516, 188.5149383544922, 114.19756317138672, 571.7631225585938, 257.51702880859375, 36.08453369140625, 439.7283020019531, 194.33392333984375, 145.00473022460938, 492.9496765136719, 119.28450775146484, 447.3702392578125, 627.1372680664062, 307.83563232421875, 207.89443969726562, 170.2355499267578, 134.19857788085938, 281.62115478515625, 332.783935546875, -304.39239501953125, 145.11538696289062, -9.69586181640625, -27.017765045166016, 290.6225280761719, 171.32516479492188, -371.4004211425781, -49.812801361083984, 400.08935546875, 337.987060546875, 182.66322326660156, 495.2518310546875, 37.45356750488281, 379.0815734863281, 171.98460388183594, 11.43994140625, 51.73027420043945, 296.76068115234375, 293.9188537597656, 270.78912353515625, -180.06642150878906, 285.2127685546875, 366.4110412597656, -184.67990112304688, 147.18824768066406, 227.02532958984375, 6.0691680908203125], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000422.npy"}
|
|
{"epoch": 0.6196769456681351, "step": 423, "batch_size": 64, "mean": 203.8914031982422, "std": 213.2364501953125, "min": -327.33489990234375, "p10": -56.19216690063476, "median": 181.00396728515625, "p90": 494.8703735351564, "max": 741.708984375, "pos_frac": 0.84375, "sample": [134.22817993164062, 84.18901824951172, 78.38597106933594, 123.12835693359375, -78.5777587890625, -175.4471435546875, 647.2667846679688, 115.2833251953125, 445.96112060546875, 291.1219177246094, 463.2737731933594, -59.32732391357422, 414.3502502441406, 178.97445678710938, 369.31768798828125, -13.585926055908203, 575.017333984375, 582.190673828125, 558.9287109375, 508.4117736816406, 9.268310546875, 59.376220703125, 353.5816955566406, 117.2402114868164, 377.79937744140625, 183.03347778320312, 39.009132385253906, 74.140625, -327.33489990234375, -141.33749389648438, 257.4180908203125, 30.638385772705078, 189.21697998046875, 299.9486083984375, 512.1282958984375, 219.78077697753906, 41.67430114746094, 382.31011962890625, -82.45912170410156, 88.34445190429688, 203.4315185546875, 388.0283203125, 197.19143676757812, 167.394287109375, 741.708984375, 188.65550231933594, 276.3961486816406, -39.734527587890625, 300.5389404296875, 146.5115203857422, 1.0259819030761719, 211.28257751464844, 342.93487548828125, 300.6066589355469, 165.921630859375, 226.5478057861328, -48.876800537109375, 159.1002197265625, 140.63946533203125, -105.36929321289062, 426.12908935546875, 114.33707427978516, 160.88284301757812, 456.8971862792969], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000423.npy"}
|
|
{"epoch": 0.6211453744493393, "step": 424, "batch_size": 64, "mean": 198.87368774414062, "std": 238.04306030273438, "min": -264.64227294921875, "p10": -73.11085739135741, "median": 178.150634765625, "p90": 426.22846679687507, "max": 932.4832763671875, "pos_frac": 0.8125, "sample": [-161.6384735107422, 299.77545166015625, 91.74314880371094, -78.62040710449219, -43.65087890625, 431.74847412109375, 354.41131591796875, 238.83863830566406, 20.16516876220703, -195.2027587890625, 283.5574951171875, 190.35336303710938, 91.59800720214844, -74.3214340209961, 268.0424499511719, 635.6622924804688, 301.4921569824219, 570.8814086914062, 106.30168151855469, 361.2109680175781, 389.1277770996094, 47.75407409667969, 367.72283935546875, 413.34844970703125, 311.53173828125, -100.71442413330078, 66.48519134521484, 166.54751586914062, -123.87521362304688, 14.12484359741211, 44.49547576904297, 932.4832763671875, -31.044986724853516, 173.61618041992188, 369.108154296875, -12.976806640625, 291.2747497558594, -70.28617858886719, 182.68508911132812, 213.81707763671875, -12.052087783813477, 112.12488555908203, -264.64227294921875, 49.426578521728516, 194.79452514648438, 186.59536743164062, 43.677757263183594, 81.01880645751953, 198.057861328125, 854.6597900390625, 101.04285430908203, 358.91644287109375, 344.2461242675781, 213.08871459960938, 308.26593017578125, 71.423828125, 643.1575317382812, 99.40328216552734, 138.35247802734375, 788.02734375, 342.7574157714844, 272.7535705566406, 142.86524963378906, 122.38107299804688], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000424.npy"}
|
|
{"epoch": 0.6226138032305433, "step": 425, "batch_size": 64, "mean": 166.87106323242188, "std": 211.56201171875, "min": -419.59173583984375, "p10": -75.08173675537108, "median": 169.96389770507812, "p90": 403.1041748046875, "max": 564.6892700195312, "pos_frac": 0.828125, "sample": [-62.4739990234375, 5.329648971557617, 354.6487121582031, -288.68145751953125, -289.30682373046875, 204.71209716796875, 148.95042419433594, 251.310546875, 68.81655883789062, 265.96807861328125, 51.06586456298828, 402.94464111328125, -419.59173583984375, -80.20013427734375, 281.7989196777344, 72.04513549804688, 89.4448013305664, 31.763092041015625, 360.18402099609375, 71.80702209472656, 306.9269104003906, 150.63421630859375, 100.60086059570312, 330.58746337890625, 477.1348876953125, -16.00579071044922, 340.0557861328125, 33.09620666503906, 24.395793914794922, 564.6892700195312, 130.9285888671875, 436.18157958984375, 377.1375732421875, -63.13880920410156, 204.49925231933594, 161.88327026367188, 403.17254638671875, 155.4408416748047, 110.64700317382812, 545.9454345703125, -217.43338012695312, 310.023681640625, 76.86241149902344, -184.42630004882812, 385.4221496582031, 280.35821533203125, 184.64125061035156, 178.04452514648438, 18.574462890625, 374.2672119140625, 324.88555908203125, 219.10769653320312, -60.50132751464844, 6.7066650390625, 151.0438232421875, 238.8136444091797, 401.47021484375, 560.639404296875, 449.2142028808594, 334.0516357421875, 214.49456787109375, -136.53494262695312, 197.11154174804688, 77.5626449584961], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000425.npy"}
|
|
{"epoch": 0.6240822320117474, "step": 426, "batch_size": 64, "mean": 208.45101928710938, "std": 223.32330322265625, "min": -416.0245361328125, "p10": -27.23798542022705, "median": 152.53746032714844, "p90": 520.8641693115235, "max": 663.6458740234375, "pos_frac": 0.859375, "sample": [232.6182098388672, 37.628944396972656, 59.820228576660156, 120.19760131835938, 193.01553344726562, 67.31373596191406, 86.86309814453125, 205.3391876220703, 587.99560546875, 165.36224365234375, 136.21324157714844, 136.87818908691406, 152.27194213867188, -26.537893295288086, 509.5519104003906, -37.533329010009766, 110.15632629394531, 152.802978515625, 369.54815673828125, 116.01870727539062, -90.4620132446289, 612.5628051757812, -40.258949279785156, 166.46572875976562, 96.77177429199219, 91.10467529296875, 44.89850997924805, 120.1212387084961, 663.6458740234375, 320.99884033203125, 74.45289611816406, 502.687255859375, 66.22045135498047, 174.30783081054688, 155.82789611816406, 578.27392578125, 37.60650634765625, -263.4429931640625, 141.61907958984375, 410.2972412109375, 3.603790283203125, 252.93992614746094, 646.1696166992188, 235.84527587890625, 358.85552978515625, 110.26549530029297, 338.195068359375, 497.4091796875, -27.53802490234375, 104.38786315917969, -53.47686004638672, 397.4330749511719, -2.670604705810547, 347.6957702636719, 472.63507080078125, 315.58770751953125, 42.241111755371094, 421.77447509765625, 525.7122802734375, -416.0245361328125, 509.48309326171875, 537.0778198242188, 48.139190673828125, 435.90032958984375], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000426.npy"}
|
|
{"epoch": 0.6255506607929515, "step": 427, "batch_size": 64, "mean": 157.48257446289062, "std": 193.04141235351562, "min": -220.189453125, "p10": -61.17403182983397, "median": 120.97082138061523, "p90": 397.9067779541016, "max": 693.3309936523438, "pos_frac": 0.828125, "sample": [185.89031982421875, 68.51344299316406, 236.29428100585938, 377.51312255859375, 591.6982421875, 79.38555145263672, 67.6897964477539, 46.90235137939453, 285.52496337890625, 200.07080078125, -27.52117919921875, 124.66203308105469, 37.477569580078125, 62.0775032043457, 143.67030334472656, 241.07737731933594, 406.8345947265625, 604.5665283203125, 76.05121612548828, -201.102783203125, 33.97716522216797, 98.858154296875, 251.7984619140625, 204.75497436523438, 177.01876831054688, 336.3792724609375, 256.509765625, 693.3309936523438, 24.682632446289062, 139.81185913085938, -45.749107360839844, 397.6619873046875, 309.62823486328125, -73.37439727783203, 21.911113739013672, 246.03787231445312, 117.27960968017578, -16.982479095458984, 411.44244384765625, 112.7866439819336, 270.19305419921875, 151.37220764160156, 59.938682556152344, 175.5731658935547, -220.189453125, 106.44928741455078, 398.0116882324219, 321.43743896484375, -213.05548095703125, -132.59100341796875, 377.71240234375, 104.302001953125, 253.44583129882812, 28.718482971191406, 559.9759521484375, 19.7855224609375, 38.44688415527344, 224.78335571289062, 320.1385192871094, 58.69768524169922, -34.004844665527344, -67.78471374511719, 72.44181060791016, -99.95429992675781], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000427.npy"}
|
|
{"epoch": 0.6270190895741556, "step": 428, "batch_size": 64, "mean": 190.41232299804688, "std": 253.84678649902344, "min": -490.87567138671875, "p10": -86.12574844360351, "median": 180.5991668701172, "p90": 542.0988891601562, "max": 673.436279296875, "pos_frac": 0.796875, "sample": [-93.01670837402344, -19.427730560302734, 80.93148040771484, 314.1778869628906, 673.436279296875, 291.7717590332031, 86.87629699707031, 498.20654296875, 401.46478271484375, 649.4345092773438, -71.29769897460938, 45.80908203125, 405.2205505371094, 80.08377075195312, 446.4066162109375, -98.1273193359375, -462.1492004394531, 542.9647216796875, 20.366775512695312, 447.9331970214844, 404.2830810546875, 183.89483642578125, -57.194183349609375, 7.188320159912109, 400.0337829589844, 556.6516723632812, 277.10174560546875, 255.8382568359375, -92.16531372070312, 234.63429260253906, 171.51589965820312, 237.4371337890625, 455.0351257324219, -38.78081512451172, 259.572265625, 13.594255447387695, 373.7121887207031, 343.360595703125, 21.947742462158203, 75.48199462890625, 112.74612426757812, 120.13299560546875, 16.30901336669922, -80.73682403564453, -310.1033630371094, 23.244033813476562, 29.980667114257812, 177.30349731445312, 621.2356567382812, 158.80435180664062, 215.5543670654297, 317.15234375, 372.57513427734375, -490.87567138671875, 384.7330627441406, 231.88124084472656, 540.07861328125, 602.3099975585938, 265.481201171875, 58.27610397338867, 552.6558837890625, -88.43528747558594, 86.48939514160156, -54.612281799316406], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000428.npy"}
|
|
{"epoch": 0.6284875183553598, "step": 429, "batch_size": 64, "mean": 167.86141967773438, "std": 235.05101013183594, "min": -369.7295227050781, "p10": -137.64381408691403, "median": 184.76275634765625, "p90": 438.98895263671886, "max": 693.423828125, "pos_frac": 0.796875, "sample": [166.18954467773438, 154.97784423828125, -225.06092834472656, 257.8642578125, -38.845375061035156, 408.4863586425781, -84.91362762451172, 72.00789642333984, 192.63165283203125, 238.5702362060547, -106.93876647949219, 225.74484252929688, 21.781761169433594, 216.71055603027344, -144.21070861816406, 296.0437927246094, 98.86367797851562, -122.32106018066406, 520.285888671875, -168.9206085205078, 652.6047973632812, 693.423828125, 25.09161376953125, 338.4734802246094, 227.7364044189453, 16.115421295166016, 294.39947509765625, 13.776561737060547, 526.4593505859375, 350.3096923828125, 121.17464447021484, 210.1112518310547, 374.8849792480469, -359.130859375, 51.669578552246094, -190.41184997558594, -49.29259490966797, 204.2161102294922, 235.24339294433594, 50.11713409423828, 351.5292663574219, 343.50054931640625, 176.89385986328125, 23.567535400390625, 660.2855834960938, 151.66265869140625, 452.0614929199219, -61.83294677734375, 66.42247009277344, -265.0711364746094, 240.46237182617188, 149.97023010253906, 376.2718505859375, 303.10272216796875, 22.560232162475586, -369.7295227050781, 9.25146484375, 288.6084899902344, 144.0567626953125, 489.4826965332031, 288.16162109375, 375.2876281738281, 392.1037292480469, 368.602783203125], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000429.npy"}
|
|
{"epoch": 0.6299559471365639, "step": 430, "batch_size": 64, "mean": 167.29953002929688, "std": 205.27218627929688, "min": -211.62648010253906, "p10": -50.73220520019531, "median": 132.39063262939453, "p90": 402.38868103027346, "max": 736.9036254882812, "pos_frac": 0.828125, "sample": [288.7949523925781, 117.5888671875, 98.97138214111328, -107.12673950195312, 498.3629455566406, 309.09747314453125, -52.428558349609375, 158.80905151367188, 57.981414794921875, 329.7989807128906, 24.89822769165039, -3.8546829223632812, 33.686988830566406, 85.74837493896484, 222.1602325439453, 394.830078125, -139.46514892578125, 362.91522216796875, 65.68013000488281, 64.72782897949219, 59.72373962402344, 58.941566467285156, 163.33351135253906, 129.320556640625, 1.4805355072021484, -16.687578201293945, 405.6280822753906, 299.3923034667969, -41.66273498535156, 134.60023498535156, 139.3133087158203, -211.62648010253906, 736.9036254882812, 93.19114685058594, 161.30935668945312, 349.53662109375, 42.86370086669922, -142.94851684570312, 18.581357955932617, 185.9559783935547, 389.9425048828125, 474.7499694824219, 313.8476867675781, 130.1810302734375, 226.44943237304688, 586.41259765625, -56.74000549316406, -46.7740478515625, 344.7862854003906, 0.7291107177734375, 83.74580383300781, 4.424884796142578, 354.9018859863281, 204.74522399902344, 0.7656059265136719, 249.42214965820312, 251.31690979003906, -158.80047607421875, 303.2915344238281, 693.2089233398438, 639.5233764648438, 157.61404418945312, 41.143287658691406, 139.9540252685547], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000430.npy"}
|
|
{"epoch": 0.631424375917768, "step": 431, "batch_size": 64, "mean": 242.71795654296875, "std": 226.33639526367188, "min": -203.11782836914062, "p10": -19.40349273681639, "median": 233.62078857421875, "p90": 524.2476867675782, "max": 857.4005126953125, "pos_frac": 0.875, "sample": [-77.75660705566406, 263.6875305175781, -175.7794952392578, 356.8702087402344, 218.48794555664062, 137.01596069335938, 687.5367431640625, 139.37762451171875, 138.83212280273438, 414.2564392089844, 237.9404296875, 387.8818054199219, 22.061708450317383, -26.931137084960938, -160.33282470703125, 120.02336120605469, 54.971771240234375, -193.617919921875, 335.3167419433594, -163.7140350341797, 361.34613037109375, 495.9442138671875, 708.5802001953125, 565.415771484375, 157.4600067138672, 475.0809631347656, 358.9337158203125, 281.1151428222656, 217.6455535888672, 367.239013671875, 579.6181030273438, 277.5685729980469, 229.3011474609375, 133.4501495361328, 460.79827880859375, -203.11782836914062, 857.4005126953125, 2.6824798583984375, 207.37356567382812, 3.5620193481445312, 437.0834045410156, 251.38478088378906, 565.697509765625, 1.2290496826171875, 64.10060119628906, 139.74403381347656, 331.89959716796875, 73.37598419189453, 183.98602294921875, 244.21543884277344, 449.1163635253906, 418.45806884765625, 397.369384765625, 319.63653564453125, 417.0060119628906, 367.9949951171875, 160.78573608398438, 109.9172134399414, -1.8389892578125, 177.24105834960938, 536.3777465820312, 196.85650634765625, 81.12057495117188, 357.6651611328125], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000431.npy"}
|
|
{"epoch": 0.6328928046989721, "step": 432, "batch_size": 64, "mean": 219.70596313476562, "std": 219.6495361328125, "min": -204.29469299316406, "p10": -18.61580162048338, "median": 161.96326446533203, "p90": 480.4013366699219, "max": 1062.234619140625, "pos_frac": 0.859375, "sample": [65.59620666503906, 125.43032836914062, 201.94691467285156, 237.57907104492188, 144.10520935058594, -28.931838989257812, 57.718421936035156, 129.2074737548828, 214.104248046875, -0.4721527099609375, 3.0160865783691406, 9.45943832397461, 468.9770202636719, 387.9700927734375, -25.881153106689453, 1062.234619140625, 445.7621154785156, 265.2670593261719, 68.74125671386719, 243.99325561523438, 53.490840911865234, 27.969146728515625, 362.5057067871094, 14.673431396484375, 360.46258544921875, 325.5993957519531, 368.231689453125, 68.94609069824219, 302.4708251953125, 157.075927734375, 236.85894775390625, -1.6633148193359375, -105.52781677246094, 107.57987976074219, -79.75344848632812, 480.4019775390625, 122.31468200683594, -204.29469299316406, 301.9640808105469, 85.49174499511719, 143.50445556640625, -81.19561767578125, 349.675537109375, 301.467041015625, 266.69598388671875, 377.92169189453125, 420.1720275878906, 390.302978515625, 329.97821044921875, 141.1878204345703, 115.42997741699219, 483.6011657714844, 65.38861083984375, 478.605712890625, 130.780029296875, 158.7465362548828, 611.2930297851562, 165.17999267578125, 480.39984130859375, 541.3077392578125, -94.18692779541016, 540.783447265625, 71.53321075439453, 611.9869384765625], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000432.npy"}
|
|
{"epoch": 0.6343612334801763, "step": 433, "batch_size": 64, "mean": 158.056396484375, "std": 226.9610137939453, "min": -556.7279663085938, "p10": -125.74440002441406, "median": 148.2198257446289, "p90": 408.9620758056641, "max": 547.7822265625, "pos_frac": 0.765625, "sample": [23.677947998046875, 389.40362548828125, 409.4033508300781, 278.23565673828125, 385.0569763183594, -158.3119659423828, 269.2425842285156, -126.84532165527344, 339.2190856933594, 407.93243408203125, -556.7279663085938, -3.4247283935546875, 503.5299987792969, 329.9670715332031, 11.5562744140625, 106.96058654785156, 268.69403076171875, 265.0152893066406, 501.61505126953125, -401.072265625, -18.1541748046875, 376.4957275390625, 379.21160888671875, 313.9573974609375, 134.40951538085938, 208.8158721923828, 59.87794494628906, 105.0180435180664, 87.05714416503906, 381.7974548339844, 0.65863037109375, 484.0175476074219, -134.86984252929688, 212.072265625, 118.70396423339844, 401.8021545410156, 378.9298095703125, -156.1072998046875, 223.30752563476562, 5.190620422363281, 547.7822265625, 442.0043640136719, 200.89071655273438, -116.7127914428711, -53.0152587890625, 85.39315032958984, 308.5659484863281, 55.59223175048828, -94.65156555175781, -121.2518310546875, 524.0440063476562, 237.07107543945312, 114.29843139648438, -123.17558288574219, 146.72268676757812, -140.05784606933594, 56.55579376220703, 321.58758544921875, -18.384580612182617, 78.9144515991211, 262.069091796875, 336.33514404296875, 109.99539947509766, 149.7169647216797], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000433.npy"}
|
|
{"epoch": 0.6358296622613803, "step": 434, "batch_size": 64, "mean": 171.66668701171875, "std": 246.3345489501953, "min": -187.5714111328125, "p10": -37.63843994140625, "median": 105.27383804321289, "p90": 452.99795532226574, "max": 1405.908935546875, "pos_frac": 0.734375, "sample": [370.4912109375, 215.4813690185547, 1405.908935546875, 67.63523864746094, -175.7021026611328, -7.427947998046875, 134.01385498046875, 346.64642333984375, -1.160634994506836, -39.39704895019531, 233.80662536621094, -33.53501892089844, 78.3979721069336, 505.3119812011719, -47.3509635925293, 69.30227661132812, -30.95836639404297, -187.5714111328125, 297.4706115722656, 530.3836669921875, 464.4228210449219, -28.54663848876953, 218.95448303222656, 94.8337631225586, 115.93255615234375, 219.02960205078125, -16.690664291381836, 55.875274658203125, 141.6281280517578, 157.68649291992188, 93.31137084960938, 290.2453918457031, 305.11199951171875, -135.06004333496094, 53.57453918457031, 419.1441650390625, 45.78510665893555, -0.4018230438232422, 156.78396606445312, 99.93771362304688, 16.433868408203125, 386.7804870605469, 424.4531555175781, 47.07121276855469, 168.76812744140625, 252.64688110351562, 318.95941162109375, 96.53147888183594, 110.6099624633789, 511.46087646484375, 234.5521697998047, -33.083740234375, 499.63385009765625, 18.586841583251953, 53.66120910644531, -9.448638916015625, 426.3399353027344, 139.69985961914062, 78.26242065429688, -114.08228302001953, 633.5110473632812, 380.5464782714844, -12.016372680664062, -126.51524353027344], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000434.npy"}
|
|
{"epoch": 0.6372980910425844, "step": 435, "batch_size": 64, "mean": 198.85479736328125, "std": 227.63307189941406, "min": -363.1865234375, "p10": -27.057739639282207, "median": 183.33132934570312, "p90": 499.51868286132833, "max": 851.87744140625, "pos_frac": 0.828125, "sample": [851.87744140625, 221.54864501953125, -322.2852783203125, 281.3424377441406, 408.87335205078125, 239.11184692382812, 200.29608154296875, 415.83551025390625, 291.657958984375, 172.6463623046875, 418.65423583984375, 51.58659362792969, 42.05055236816406, 277.7806396484375, -35.48545837402344, 453.015380859375, 667.8582153320312, 521.4563598632812, 240.39035034179688, -4.3954315185546875, -2.232086181640625, 17.54931640625, 18.367599487304688, 135.94667053222656, -126.87078857421875, 120.55430603027344, 113.52919006347656, 225.33554077148438, -163.89105224609375, 187.29522705078125, 242.26434326171875, -48.91160583496094, 156.350830078125, 140.31936645507812, 358.19342041015625, 345.79443359375, 345.382080078125, -363.1865234375, 282.29833984375, -217.03839111328125, 133.19937133789062, 131.85586547851562, 573.30810546875, 25.217391967773438, 179.367431640625, 170.94183349609375, 519.4486694335938, 405.24017333984375, -7.393062591552734, -1.1278095245361328, 639.614990234375, 40.76195526123047, 195.45172119140625, 58.010467529296875, 258.7296447753906, 582.81201171875, 448.0845642089844, 149.01327514648438, 140.4571075439453, 318.12091064453125, 65.36001586914062, 47.66706848144531, 270.28826904296875, 221.41171264648438], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000435.npy"}
|
|
{"epoch": 0.6387665198237885, "step": 436, "batch_size": 64, "mean": 195.33963012695312, "std": 213.54103088378906, "min": -217.95419311523438, "p10": -99.07343521118165, "median": 168.6217041015625, "p90": 493.5233215332033, "max": 779.9552001953125, "pos_frac": 0.84375, "sample": [-98.0545654296875, 141.7760467529297, -103.80645751953125, 152.82675170898438, 404.0838317871094, 182.6299591064453, 216.37387084960938, 644.6146850585938, 312.0174560546875, -132.75607299804688, 121.73390197753906, 691.3118896484375, 43.400726318359375, -99.51009368896484, 241.5564727783203, 162.07205200195312, -73.63471984863281, 52.7900390625, 588.7882690429688, -108.66693115234375, 340.58050537109375, 238.26626586914062, 290.6947021484375, 313.052734375, 508.5769348144531, 221.42123413085938, 377.1903381347656, 458.3982238769531, 77.74125671386719, 142.71058654785156, 779.9552001953125, 68.80835723876953, 526.5218505859375, 264.5101318359375, 224.33660888671875, 121.1033935546875, 93.05229187011719, 20.785377502441406, 206.58377075195312, 97.21929931640625, 352.77423095703125, 67.35922241210938, 154.16844177246094, 126.23841094970703, 225.8097686767578, 281.1925048828125, 143.2141876220703, -110.73299407958984, 175.17135620117188, 121.53094482421875, 217.95632934570312, 139.52001953125, 238.36737060546875, 398.62347412109375, 667.7095336914062, 28.933456420898438, 80.72293853759766, 36.03901672363281, -217.95419311523438, 194.40370178222656, -99.70317840576172, 198.59214782714844, 431.3919982910156, -60.648406982421875], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000436.npy"}
|
|
{"epoch": 0.6402349486049926, "step": 437, "batch_size": 64, "mean": 178.64273071289062, "std": 196.41163635253906, "min": -221.06732177734375, "p10": -60.99181671142578, "median": 170.47193145751953, "p90": 430.2403198242188, "max": 714.3364868164062, "pos_frac": 0.796875, "sample": [138.7025909423828, -176.29306030273438, 196.81752014160156, 276.2460632324219, 247.01760864257812, -94.4450454711914, 430.92626953125, 118.2684326171875, 238.5748748779297, -15.551605224609375, -221.06732177734375, 127.13214111328125, -135.99722290039062, 398.0871276855469, 50.645294189453125, 97.80659484863281, 223.52301025390625, -139.72723388671875, 82.556396484375, 452.1180114746094, 42.895652770996094, 370.62060546875, 165.05908203125, 147.30758666992188, -62.9342041015625, 549.7230224609375, 115.61190032958984, 76.97394561767578, 282.9140625, 277.79925537109375, 627.9505004882812, 281.45428466796875, 595.1873779296875, -115.03999328613281, 714.3364868164062, -56.45957946777344, -21.99812889099121, -48.82334899902344, 250.25759887695312, 141.72116088867188, 93.64949798583984, 103.26565551757812, 177.89772033691406, 231.9559326171875, 344.8316650390625, 104.53622436523438, 428.6397705078125, 328.5595397949219, 354.06512451171875, 312.4610290527344, 75.81208801269531, 275.0726013183594, 237.8573760986328, -12.603302001953125, 41.91791534423828, 446.83233642578125, 232.32501220703125, 175.88478088378906, -6.5274200439453125, 202.2696075439453, 84.86041259765625, 329.4305725097656, 204.17770385742188, 36.065406799316406], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000437.npy"}
|
|
{"epoch": 0.6417033773861968, "step": 438, "batch_size": 64, "mean": 216.19813537597656, "std": 199.6256103515625, "min": -181.40298461914062, "p10": 5.610543060302745, "median": 192.77792358398438, "p90": 527.6327301025392, "max": 642.2778930664062, "pos_frac": 0.90625, "sample": [1.1180744171142578, 117.15143585205078, 311.6438293457031, -89.61634826660156, 119.11560821533203, 95.53311157226562, 623.3602905273438, 16.09296989440918, 159.30258178710938, 28.53515625, 246.31588745117188, 259.3432922363281, 47.184261322021484, 86.16167449951172, 354.13507080078125, 385.36767578125, 236.78028869628906, 556.491943359375, 147.4233856201172, 220.4403076171875, 123.63524627685547, 536.1981201171875, 341.05914306640625, 538.9771728515625, 118.84500122070312, 147.85238647460938, 66.60318756103516, 329.6819152832031, -181.40298461914062, 32.374969482421875, 21.142911911010742, 18.29771614074707, 306.5049133300781, 404.65545654296875, 507.6468200683594, 235.28302001953125, 199.29513549804688, -130.525390625, 386.93487548828125, 269.3870849609375, 587.3135986328125, 156.902587890625, 36.997196197509766, 642.2778930664062, -43.992095947265625, 109.02528381347656, 498.845703125, 312.60723876953125, 318.8389892578125, 250.7823486328125, 90.96804809570312, 243.21194458007812, 542.2033081054688, 78.04005432128906, -60.78411865234375, 179.43511962890625, 186.26071166992188, -162.96571350097656, 24.73809051513672, 415.5306396484375, 175.3618927001953, 499.7205505371094, 210.98081970214844, 350.05438232421875], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000438.npy"}
|
|
{"epoch": 0.6431718061674009, "step": 439, "batch_size": 64, "mean": 175.6580810546875, "std": 226.91339111328125, "min": -193.12513732910156, "p10": -115.76691818237303, "median": 141.59404754638672, "p90": 508.6893432617188, "max": 729.6695556640625, "pos_frac": 0.765625, "sample": [168.74569702148438, 607.1765747070312, 34.365631103515625, 291.7748107910156, 9.808929443359375, -101.06011962890625, 462.6236267089844, 101.68647766113281, 168.43283081054688, 99.92554473876953, -182.33111572265625, 518.8856811523438, 48.27326965332031, -46.06499481201172, 18.097267150878906, 174.75765991210938, 266.21435546875, 353.3868713378906, 338.1956481933594, 147.48362731933594, 137.8616180419922, 729.6695556640625, 248.44064331054688, -8.654327392578125, 118.8303451538086, -138.30804443359375, 106.18862915039062, 40.26963424682617, -2.1489124298095703, 24.134389877319336, 317.27978515625, -122.06983184814453, 346.00836181640625, 322.4427795410156, 715.929443359375, 225.087158203125, 425.0590515136719, 280.4685363769531, 75.48838806152344, 164.0228271484375, 123.99224090576172, -143.35293579101562, -193.12513732910156, -18.94635009765625, 159.16293334960938, 141.0323486328125, 153.20465087890625, 237.79940795898438, -160.832763671875, 484.89788818359375, 658.6165161132812, 55.83840560913086, 142.15574645996094, 554.034912109375, 117.5110855102539, -1.0308055877685547, 387.2154235839844, 273.2493591308594, -19.84347915649414, 57.35343933105469, 668.6677856445312, 232.916259765625, -149.01092529296875, -5.766277313232422], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000439.npy"}
|
|
{"epoch": 0.644640234948605, "step": 440, "batch_size": 64, "mean": 175.279052734375, "std": 231.95199584960938, "min": -267.97271728515625, "p10": -71.04639663696288, "median": 106.88148498535156, "p90": 530.1604797363282, "max": 757.1653442382812, "pos_frac": 0.765625, "sample": [378.2406311035156, 162.161865234375, 561.9560546875, 312.82635498046875, -88.860107421875, 59.84368133544922, 422.04949951171875, 5.075811386108398, 80.13389587402344, 616.8961181640625, 44.50218963623047, 70.7767333984375, 404.08099365234375, 119.33135986328125, 539.4354858398438, -79.94514465332031, 94.22637939453125, -254.0399932861328, 46.736480712890625, 10.921455383300781, 94.43161010742188, 435.1085510253906, 66.17823791503906, 36.624595642089844, 154.9464874267578, 155.9193572998047, -93.43151092529297, 260.655517578125, 303.19952392578125, 88.85443115234375, 178.90670776367188, 434.0312805175781, -63.211883544921875, 207.5905303955078, -22.352659225463867, -34.413116455078125, 639.1412963867188, -267.97271728515625, 195.7037353515625, -80.99703216552734, 508.518798828125, 132.535888671875, 728.9332275390625, 340.04022216796875, 155.3560333251953, 236.95599365234375, -3.69415283203125, -22.651260375976562, 353.9587097167969, 366.82806396484375, 178.7032470703125, 544.9157104492188, -8.968238830566406, 44.97583770751953, 33.61404037475586, 456.66217041015625, 12.888561248779297, -37.890647888183594, -30.59870147705078, 52.907867431640625, 757.1653442382812, 283.6671447753906, -74.40404510498047, 12.1761474609375], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000440.npy"}
|
|
{"epoch": 0.6461086637298091, "step": 441, "batch_size": 64, "mean": 204.75228881835938, "std": 273.9400939941406, "min": -480.70318603515625, "p10": -27.004932403564446, "median": 165.53504943847656, "p90": 525.1487243652344, "max": 1263.4884033203125, "pos_frac": 0.796875, "sample": [220.4925079345703, 86.82962036132812, 162.4998779296875, 387.826416015625, 313.577880859375, 614.4627685546875, -17.030364990234375, 33.57823181152344, 245.01263427734375, -359.83648681640625, -75.49882507324219, -6.361394882202148, 526.6295166015625, 695.1182861328125, 339.08380126953125, 42.064537048339844, 39.46882629394531, 498.61981201171875, 90.49459838867188, 48.485260009765625, 29.705507278442383, 110.02647399902344, 168.57022094726562, 579.297119140625, 653.9472045898438, 102.88522338867188, 145.73741149902344, 368.4881286621094, 289.3634033203125, 1263.4884033203125, 20.067846298217773, -299.6514587402344, 296.68487548828125, 461.21832275390625, -198.5643768310547, 10.022340774536133, 121.57901763916016, 275.8163146972656, 521.6935424804688, -29.50030517578125, -3.29132080078125, 288.6707763671875, -19.262710571289062, -480.70318603515625, 389.3421630859375, 235.44020080566406, 316.0994873046875, 464.5586853027344, 279.79925537109375, 5.153964996337891, 78.79304504394531, -79.9090347290039, -13.96600341796875, 352.44915771484375, 302.65234375, 140.73211669921875, 414.37738037109375, 250.95631408691406, 318.2725524902344, 578.6640014648438, 7.429958343505859, -21.182395935058594, 153.18948364257812, 369.4873046875], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000441.npy"}
|
|
{"epoch": 0.6475770925110133, "step": 442, "batch_size": 64, "mean": 228.81829833984375, "std": 236.02589416503906, "min": -214.49232482910156, "p10": -19.735064125061015, "median": 199.12677764892578, "p90": 547.6051940917969, "max": 813.9794921875, "pos_frac": 0.890625, "sample": [94.26969146728516, 146.46920776367188, 480.40667724609375, 83.7974853515625, 302.8591003417969, 112.5301513671875, -44.65782928466797, 166.14869689941406, 57.08131408691406, 97.41851806640625, 121.86772155761719, 341.1001281738281, 30.611595153808594, 275.385986328125, 273.9708251953125, 530.2603149414062, 629.4605712890625, 225.8126678466797, 9.579498291015625, 203.3413543701172, 99.97232818603516, 430.30047607421875, 9.27490234375, 265.00701904296875, -28.85274314880371, 188.53240966796875, 525.625244140625, 220.63612365722656, 667.8182373046875, 194.91220092773438, 131.24563598632812, -214.49232482910156, 260.27392578125, 82.98915100097656, 14.778648376464844, 423.16827392578125, -33.53726577758789, 421.0810241699219, 126.18773651123047, 765.5638427734375, -178.33743286132812, 340.1782531738281, 536.6679077148438, 19.472997665405273, 23.2840576171875, 692.742919921875, 552.2926025390625, 226.67361450195312, 399.7566223144531, 119.6532211303711, 27.40325927734375, -139.17401123046875, 1.539520263671875, 218.3703155517578, 17.443862915039062, -106.22639465332031, 436.04425048828125, 276.3606262207031, 813.9794921875, 302.3789367675781, 716.7374267578125, 357.3241882324219, 94.06233215332031, 207.5433807373047], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000442.npy"}
|
|
{"epoch": 0.6490455212922174, "step": 443, "batch_size": 64, "mean": 153.0765838623047, "std": 254.00697326660156, "min": -465.20281982421875, "p10": -122.66063537597655, "median": 133.29341888427734, "p90": 421.1488311767579, "max": 885.4666748046875, "pos_frac": 0.765625, "sample": [885.4666748046875, 67.89572143554688, 576.4756469726562, 13.309371948242188, 241.02182006835938, 104.36073303222656, 45.642173767089844, -81.83363342285156, -63.51698684692383, 130.51014709472656, -25.119644165039062, -26.353652954101562, 211.44932556152344, 235.36257934570312, 53.479736328125, -455.7334899902344, 199.0142364501953, -126.03623962402344, 145.32730102539062, 117.41415405273438, 879.3176879882812, 406.26129150390625, 327.1253662109375, 264.6696472167969, -262.2273254394531, -465.20281982421875, 237.13729858398438, 186.1194305419922, 453.1072082519531, -416.9735412597656, 325.68212890625, 308.3350830078125, 132.85499572753906, 132.93211364746094, 62.26810836791992, -50.78376007080078, 133.65472412109375, 379.990478515625, 305.3394470214844, 98.11103820800781, -167.5258026123047, -130.12034606933594, 353.7059631347656, -58.709468841552734, 144.70037841796875, 55.42079162597656, 293.3719482421875, 455.9216003417969, 83.90668487548828, 174.53167724609375, 402.7845458984375, 97.0255126953125, 142.1152801513672, 106.02467346191406, -114.78422546386719, 290.80999755859375, -25.52838134765625, 324.4110107421875, 164.02423095703125, 62.206146240234375, 427.5292053222656, 609.3240356445312, 111.77582550048828, 308.1255187988281], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000443.npy"}
|
|
{"epoch": 0.6505139500734214, "step": 444, "batch_size": 64, "mean": 229.82940673828125, "std": 255.07591247558594, "min": -338.2934265136719, "p10": -40.79745559692381, "median": 185.71441650390625, "p90": 526.1148742675783, "max": 975.5253295898438, "pos_frac": 0.828125, "sample": [251.76478576660156, -119.25537109375, 75.85880279541016, 417.9578857421875, 56.03343963623047, 244.03155517578125, 6.576679229736328, 943.7373046875, 439.2565612792969, -116.76795959472656, -14.34646987915039, 181.93923950195312, 342.769775390625, 413.96234130859375, 477.46124267578125, 164.308349609375, 448.04119873046875, 286.01837158203125, -90.38861083984375, 38.28330993652344, -47.312767028808594, -9.436166763305664, 343.64178466796875, 723.0181884765625, 36.220550537109375, 315.24652099609375, 248.10479736328125, -23.872314453125, 167.7941131591797, -177.4708251953125, 36.91931915283203, 975.5253295898438, 281.2690124511719, 166.2529296875, 143.8164520263672, 546.9664306640625, 457.70318603515625, 360.0673828125, 408.967041015625, 189.48959350585938, -25.595062255859375, 707.1170043945312, 571.0570068359375, 64.41478729248047, 411.9221496582031, 368.61981201171875, 376.707275390625, 82.4705581665039, 203.37208557128906, 131.77862548828125, 427.98736572265625, 87.96098327636719, 133.66452026367188, 139.55584716796875, 370.2063903808594, 149.65423583984375, 25.318830490112305, 63.93925476074219, -131.75735473632812, 72.88079833984375, 632.6602783203125, -338.2934265136719, 196.50485229492188, 396.78228759765625], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000444.npy"}
|
|
{"epoch": 0.6519823788546255, "step": 445, "batch_size": 64, "mean": 221.20608520507812, "std": 262.16986083984375, "min": -285.27978515625, "p10": -82.56894149780273, "median": 163.01142120361328, "p90": 557.5391540527345, "max": 866.544921875, "pos_frac": 0.84375, "sample": [184.206298828125, 29.556791305541992, 55.063743591308594, 710.3780517578125, 61.79731750488281, 396.26446533203125, 289.785888671875, 55.61913299560547, 112.7742919921875, 182.39805603027344, 22.011165618896484, 519.9647827148438, -45.56980895996094, 235.42959594726562, 17.526771545410156, -285.27978515625, 134.90655517578125, 411.0080871582031, 87.43094635009766, 120.11701965332031, 281.39031982421875, 339.94317626953125, 457.10986328125, 573.6424560546875, 57.99702835083008, 143.62478637695312, 846.8468627929688, 477.1455993652344, 338.79541015625, 600.8123779296875, 513.1975708007812, 727.9046630859375, 116.01823425292969, 516.9810180664062, 55.19358825683594, -75.108642578125, 270.55963134765625, 248.85235595703125, -116.92922973632812, 463.80023193359375, 25.947227478027344, 843.9896240234375, 372.5926818847656, -120.31184387207031, 366.2269287109375, 248.53965759277344, 111.87503814697266, -167.43475341796875, 40.308631896972656, 15.65046501159668, 299.37371826171875, 43.343284606933594, 37.58564376831055, 390.7188720703125, 342.4951171875, 140.15538024902344, -13.524932861328125, -85.7662124633789, 866.544921875, -110.52012634277344, 261.31756591796875, 231.00436401367188, 122.1072998046875, -238.19564819335938], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000445.npy"}
|
|
{"epoch": 0.6534508076358296, "step": 446, "batch_size": 64, "mean": 233.79986572265625, "std": 216.83155822753906, "min": -257.78741455078125, "p10": -30.478708648681632, "median": 212.3108139038086, "p90": 573.1609313964846, "max": 723.6754760742188, "pos_frac": 0.859375, "sample": [-23.13408660888672, 155.8388214111328, 87.64949035644531, 635.3775634765625, 697.4215698242188, 306.6592712402344, 59.17399978637695, -257.78741455078125, 136.7899932861328, 518.4903564453125, 723.6754760742188, 270.6328125, 66.62751007080078, 493.63214111328125, 85.2267074584961, 492.5049133300781, 467.41357421875, 261.5599060058594, 134.99905395507812, 330.263671875, -33.62640380859375, 81.92984008789062, 188.7562713623047, 166.79884338378906, 176.85609436035156, -134.83363342285156, 129.31837463378906, 328.4571838378906, 357.50860595703125, 231.82034301757812, 321.7232360839844, -56.21867370605469, 104.24209594726562, 174.40147399902344, 6.09162712097168, 195.184326171875, -16.937362670898438, 415.3565673828125, 101.72491455078125, 533.7384643554688, 307.97802734375, 29.505414962768555, 381.6378173828125, 671.763671875, 217.48028564453125, 76.1634292602539, 590.0562744140625, 381.911865234375, 222.94004821777344, 347.99365234375, 225.622802734375, -51.80360412597656, -87.98388671875, 191.25668334960938, 108.03083038330078, 214.97442626953125, 95.59051513671875, 267.45294189453125, 608.7073974609375, 209.64720153808594, -59.955177307128906, 590.2269897460938, 241.6021728515625, 267.0845031738281], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000446.npy"}
|
|
{"epoch": 0.6549192364170338, "step": 447, "batch_size": 64, "mean": 231.62570190429688, "std": 254.5046844482422, "min": -195.8526153564453, "p10": -38.3538101196289, "median": 184.32931518554688, "p90": 512.4538177490234, "max": 1374.043701171875, "pos_frac": 0.84375, "sample": [110.39999389648438, 135.31105041503906, 233.16647338867188, 244.0786590576172, 47.166473388671875, -119.82331848144531, 436.1759948730469, 138.56649780273438, 220.55397033691406, -34.781402587890625, -68.8074951171875, 420.16009521484375, 216.92422485351562, 198.0501251220703, 507.4048156738281, 358.58355712890625, 628.194091796875, 116.92611694335938, 1374.043701171875, 602.299072265625, 170.752197265625, 713.87548828125, 315.53314208984375, 400.5428771972656, -39.88484191894531, 159.32943725585938, 235.69277954101562, -138.69046020507812, 183.3238525390625, 51.39604187011719, -92.74789428710938, 124.51347351074219, 80.06970977783203, -79.84225463867188, 351.6518859863281, 19.32423210144043, 136.240478515625, 460.1543273925781, 57.54522705078125, 25.460105895996094, 422.0291748046875, 9.144767761230469, 135.5865936279297, -195.8526153564453, 592.7237548828125, 204.73175048828125, 363.725341796875, -8.72700309753418, 171.004638671875, 34.562156677246094, 388.92620849609375, 302.5628967285156, 293.821533203125, 185.33477783203125, 88.973876953125, 465.89495849609375, 191.04835510253906, 74.00584411621094, 622.95068359375, 514.61767578125, 170.72164916992188, 471.24334716796875, 434.2062072753906, -8.024444580078125], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000447.npy"}
|
|
{"epoch": 0.6563876651982379, "step": 448, "batch_size": 64, "mean": 246.0916290283203, "std": 240.18214416503906, "min": -129.9900665283203, "p10": -54.07841567993164, "median": 201.45510864257812, "p90": 512.1120819091797, "max": 895.107666015625, "pos_frac": 0.84375, "sample": [54.30638885498047, 67.2872314453125, -71.49197387695312, 145.38784790039062, 631.8339233398438, 17.30670166015625, 124.14485168457031, -54.31279754638672, 305.5863037109375, 47.522705078125, 381.7086181640625, -16.837242126464844, 563.8797607421875, 111.8292236328125, 237.36480712890625, 350.55023193359375, 176.3650665283203, 112.65440368652344, 513.5955200195312, -53.531524658203125, -110.18537902832031, 143.50106811523438, 508.6507263183594, 361.16278076171875, 219.45782470703125, 213.11593627929688, 148.68936157226562, 177.8233642578125, 466.2439880371094, 645.59130859375, 313.00213623046875, 491.678955078125, 114.54891204833984, 340.6935119628906, 895.107666015625, 34.04115295410156, 61.19349670410156, 167.47166442871094, 110.19889068603516, 257.70965576171875, 452.809326171875, 446.57171630859375, 171.0475311279297, -83.33240509033203, 859.1047973632812, 264.76544189453125, 118.2210693359375, -92.82095336914062, 254.1375732421875, 189.79428100585938, 266.7786560058594, -129.9900665283203, 872.396728515625, -85.90489959716797, 503.82965087890625, 486.6358947753906, -14.373138427734375, 241.7696533203125, 111.34113311767578, 119.10237884521484, 491.6603698730469, 466.5130920410156, 362.905029296875, 272.05462646484375], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000448.npy"}
|
|
{"epoch": 0.657856093979442, "step": 449, "batch_size": 64, "mean": 222.92221069335938, "std": 310.96185302734375, "min": -561.89697265625, "p10": -83.95564270019531, "median": 165.50694274902344, "p90": 571.2506652832031, "max": 1375.86376953125, "pos_frac": 0.78125, "sample": [100.46025085449219, 66.75955963134766, 290.15667724609375, 14.395614624023438, 619.666748046875, 537.0450439453125, 116.238037109375, 0.12004661560058594, 143.82720947265625, 166.83724975585938, 620.2801513671875, -98.60188293457031, -33.089515686035156, 120.2245864868164, 568.9295654296875, 41.781917572021484, 444.99664306640625, 111.93753051757812, 90.52699279785156, -86.06867980957031, 556.6480712890625, 222.2779998779297, 504.6092834472656, 504.6935729980469, 261.4365234375, 572.2454223632812, -97.84988403320312, 36.12388610839844, 221.96124267578125, 277.6370544433594, 190.36581420898438, 159.09475708007812, -45.1966552734375, 459.3203125, 437.216064453125, 561.9779052734375, -67.33170318603516, 87.38197326660156, -170.4334716796875, -291.35504150390625, 1375.86376953125, 491.6893615722656, -274.3089599609375, 280.55230712890625, 270.08123779296875, 164.1766357421875, -51.9122314453125, -561.89697265625, -39.825443267822266, 790.4495239257812, 213.5763702392578, 753.6795654296875, 284.5560302734375, -79.02522277832031, 259.0797119140625, 162.08853149414062, 404.47662353515625, 875.9197387695312, 256.01715087890625, 34.43739318847656, -18.079116821289062, 8.75421142578125, 86.65383911132812, 362.7713928222656], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000449.npy"}
|
|
{"epoch": 0.6593245227606461, "step": 450, "batch_size": 64, "mean": 251.05023193359375, "std": 229.22837829589844, "min": -521.5149536132812, "p10": -0.744967269897451, "median": 238.94629669189453, "p90": 541.2119445800784, "max": 867.3226318359375, "pos_frac": 0.890625, "sample": [184.07830810546875, 478.91650390625, 577.78173828125, 390.2303161621094, 65.66943359375, 568.3096923828125, 867.3226318359375, -56.46208190917969, 256.34820556640625, -33.525108337402344, 489.99432373046875, 444.10150146484375, 483.431396484375, 152.56204223632812, 289.60003662109375, 439.88262939453125, 108.49530029296875, 121.30743408203125, 264.73583984375, 105.4140625, 240.24855041503906, 394.28497314453125, 309.81591796875, 563.162353515625, 223.5594024658203, -5.029201507568359, 112.78468322753906, 643.8683471679688, 234.89378356933594, 415.6610107421875, 116.26261901855469, 361.90936279296875, 283.54766845703125, 143.3348846435547, 294.29217529296875, 28.024738311767578, 130.31605529785156, 377.8875427246094, 190.2166748046875, 113.63825988769531, 669.9420776367188, -138.3340606689453, 195.81924438476562, 364.8088684082031, 125.36134338378906, 240.98446655273438, 25.75029754638672, 426.4455871582031, 237.64404296875, 91.58329772949219, 373.9961853027344, -24.988311767578125, 347.99261474609375, 212.43336486816406, 37.10511016845703, -521.5149536132812, 304.41876220703125, 263.71685791015625, -16.78508758544922, 210.0951385498047, 176.11984252929688, 841.0911865234375, 9.251579284667969, 243.40383911132812], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000450.npy"}
|
|
{"epoch": 0.6607929515418502, "step": 451, "batch_size": 64, "mean": 197.39898681640625, "std": 257.4117126464844, "min": -726.2311401367188, "p10": -91.5798583984375, "median": 197.930908203125, "p90": 474.2173492431641, "max": 678.1416015625, "pos_frac": 0.84375, "sample": [118.67593383789062, 42.572540283203125, 76.68856811523438, 218.30465698242188, 393.41314697265625, 164.33712768554688, 198.36448669433594, 456.6757507324219, 51.07103729248047, 297.45294189453125, 436.5636901855469, 626.632080078125, 457.6956787109375, 642.04736328125, 128.9002685546875, 163.99197387695312, 76.92008209228516, -726.2311401367188, -96.3609619140625, -152.98443603515625, 122.93437957763672, 78.52790069580078, 401.8569030761719, 407.2027587890625, 359.2832336425781, 197.49732971191406, 323.642822265625, 182.88597106933594, 229.7867431640625, 595.6121826171875, 114.755859375, 678.1416015625, 674.6619873046875, 184.51895141601562, 358.8997497558594, 201.58148193359375, 334.63018798828125, -81.25996398925781, 267.6880798339844, 196.3850860595703, -43.106361389160156, -5.797885894775391, 413.2288818359375, -574.1275634765625, 206.0081787109375, 179.74658203125, 208.9567108154297, 198.99874877929688, 261.1336669921875, 28.652807235717773, 263.0604248046875, -96.00267028808594, -111.48883056640625, 529.4681396484375, 452.2348327636719, 136.13143920898438, 401.9376220703125, -287.0379943847656, 162.47268676757812, 8.906671524047852, 481.2980651855469, 25.486858367919922, 243.93466186523438, 145.47500610351562], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000451.npy"}
|
|
{"epoch": 0.6622613803230544, "step": 452, "batch_size": 64, "mean": 230.67544555664062, "std": 214.3404083251953, "min": -291.4769592285156, "p10": -22.753715515136705, "median": 218.9239959716797, "p90": 500.0166015625, "max": 846.8362426757812, "pos_frac": 0.859375, "sample": [413.6171875, -80.5040054321289, 443.6683654785156, 304.205322265625, -50.31829071044922, 389.9391784667969, 346.3313903808594, 340.9144287109375, 326.2821350097656, -45.06261444091797, 602.8441162109375, 248.49365234375, 88.59542846679688, 45.5042724609375, 308.5133056640625, 212.95001220703125, 366.2668762207031, 294.9983825683594, 400.88458251953125, 30.84107780456543, 326.22998046875, 29.27405548095703, 435.8857421875, 152.003662109375, 212.0347137451172, 607.2493896484375, 97.31178283691406, -196.35165405273438, 377.98040771484375, 85.81549072265625, -27.833038330078125, 395.4967956542969, 94.68318176269531, 224.89797973632812, 630.3026733398438, 117.046875, 241.9932098388672, 473.95025634765625, 126.75890350341797, 148.98007202148438, 122.66275024414062, -5.345458984375, 171.09100341796875, 580.070556640625, 139.19686889648438, 357.4134521484375, 94.9271240234375, 496.4749450683594, 846.8362426757812, 171.2410430908203, 107.34504699707031, 334.5787353515625, 323.553955078125, 16.89901351928711, -291.4769592285156, 160.57354736328125, -45.073455810546875, 501.5344543457031, 260.30889892578125, 533.7521362304688, 50.76178741455078, 239.76248168945312, -10.901962280273438, 64.36748504638672], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000452.npy"}
|
|
{"epoch": 0.6637298091042585, "step": 453, "batch_size": 64, "mean": 205.49830627441406, "std": 249.58705139160156, "min": -196.7469482421875, "p10": -51.60239601135254, "median": 138.25823211669922, "p90": 528.640155029297, "max": 1242.412109375, "pos_frac": 0.796875, "sample": [494.91851806640625, 468.12628173828125, -4.066337585449219, 104.92403411865234, 246.76089477539062, 133.6254425048828, 178.3759765625, 138.8046417236328, 80.7471923828125, 102.241943359375, 194.5750732421875, 255.28176879882812, 391.9509582519531, 456.92169189453125, 120.99078369140625, 101.94044494628906, 305.783203125, 233.05068969726562, -50.403717041015625, 55.73475646972656, 349.0450439453125, -107.26420593261719, 145.32806396484375, 157.1119384765625, 574.7847900390625, -180.05880737304688, 55.08149719238281, 380.8964538574219, -196.7469482421875, 303.5308532714844, -9.269180297851562, 124.48876953125, 404.16070556640625, 545.6090087890625, 594.7523803710938, 37.95732498168945, -29.708633422851562, 137.71182250976562, 1242.412109375, 685.7694091796875, 64.86093139648438, -137.6302032470703, 682.9138793945312, 11.178642272949219, 190.0988311767578, 62.705963134765625, 302.2364501953125, 69.28668975830078, 390.28656005859375, 300.09832763671875, -83.07685852050781, 347.44097900390625, -52.11611557006836, 407.5821228027344, 109.67904663085938, -95.20972442626953, -16.613632202148438, 154.98828125, 543.09228515625, 453.68914794921875, -37.37858963012695, 45.45484161376953, 93.22306823730469, 119.22412872314453], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000453.npy"}
|
|
{"epoch": 0.6651982378854625, "step": 454, "batch_size": 64, "mean": 236.93299865722656, "std": 267.8375549316406, "min": -399.8970947265625, "p10": -22.307669067382804, "median": 198.27310180664062, "p90": 552.3072235107425, "max": 1094.6763916015625, "pos_frac": 0.875, "sample": [127.4283676147461, -12.81488037109375, 760.2634887695312, 79.06047058105469, 443.9924621582031, 49.67802429199219, 479.0348815917969, 264.9390869140625, 65.28994750976562, 214.7281951904297, -399.8970947265625, 373.87744140625, 258.62298583984375, 247.937744140625, 181.26309204101562, 141.6822509765625, 192.90760803222656, 266.04022216796875, -153.63400268554688, -306.2947998046875, 29.472352981567383, 477.930908203125, -44.580841064453125, 697.5634765625, 15.662559509277344, -26.376007080078125, 475.55682373046875, 70.8638687133789, 77.28468322753906, 203.90106201171875, 382.4181213378906, 84.67518615722656, 251.49383544921875, 109.15775299072266, 411.66448974609375, 155.46279907226562, 227.18389892578125, 107.45025634765625, 359.326416015625, 203.6385955810547, 370.18536376953125, 393.4031677246094, 74.73469543457031, 312.7485046386719, 21.811538696289062, 584.3697509765625, 1094.6763916015625, 293.41461181640625, 83.30631256103516, 167.70480346679688, 917.341552734375, 911.22265625, 111.0186538696289, 398.47161865234375, 110.53365325927734, 339.9996643066406, 440.0562744140625, -40.285430908203125, 292.7138366699219, -34.77708435058594, 148.0963897705078, 38.85844039916992, 583.7096557617188, 6.540363311767578], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000454.npy"}
|
|
{"epoch": 0.6666666666666666, "step": 455, "batch_size": 64, "mean": 275.0581359863281, "std": 299.25799560546875, "min": -433.3590087890625, "p10": -18.424466705322246, "median": 241.96566009521484, "p90": 717.3373413085938, "max": 975.5257568359375, "pos_frac": 0.890625, "sample": [95.68952941894531, 199.95306396484375, 387.4775390625, 645.9833984375, -259.118408203125, 313.4488830566406, 535.19189453125, -44.19526672363281, 851.0595703125, -178.72007751464844, 786.2785034179688, 162.1374969482422, 708.3207397460938, -139.91082763671875, 803.8748779296875, -93.55989837646484, 276.0243835449219, 713.9517822265625, 108.4682846069336, 88.98878479003906, 1.638214111328125, 378.3564758300781, 224.63282775878906, 49.198829650878906, 285.12261962890625, 917.0587768554688, 304.83734130859375, -27.02275848388672, 95.77964782714844, 159.41197204589844, 286.3540344238281, 452.3717041015625, 184.494140625, 146.68209838867188, 113.7308349609375, 150.77487182617188, 306.420654296875, 310.2380676269531, 39.499237060546875, -433.3590087890625, 380.7019958496094, 38.82448959350586, 266.958740234375, 476.55401611328125, 29.585540771484375, 82.96761322021484, 15.403413772583008, 377.0106201171875, 718.2330932617188, 259.2984924316406, 623.7274169921875, 26.761932373046875, 260.0712890625, 477.1673278808594, 107.48682403564453, 94.53921508789062, 18.936145782470703, 715.2472534179688, 287.449462890625, 278.6434020996094, 871.8787841796875, 157.240234375, 155.9430389404297, 975.5257568359375], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000455.npy"}
|
|
{"epoch": 0.6681350954478708, "step": 456, "batch_size": 64, "mean": 266.81292724609375, "std": 236.5420379638672, "min": -147.58364868164062, "p10": 15.617835426330567, "median": 202.9878692626953, "p90": 675.0650268554688, "max": 759.363037109375, "pos_frac": 0.953125, "sample": [146.76547241210938, 85.67991638183594, 644.516357421875, 162.0174560546875, 7.9273681640625, 259.63916015625, 24.328182220458984, 81.46096801757812, 200.72393798828125, 537.1061401367188, 353.88299560546875, 11.587800979614258, 170.3183135986328, 696.3997802734375, 216.94448852539062, 56.99291229248047, 76.63993835449219, 416.0839538574219, 240.09339904785156, 319.4471740722656, 171.00038146972656, 50.18788146972656, 16.60321044921875, 748.3809814453125, -79.00146484375, 357.5265808105469, 732.0775146484375, 327.1924133300781, 487.07568359375, 59.851409912109375, 202.1104736328125, 438.5762023925781, 528.766357421875, 229.43594360351562, 101.64864349365234, 45.014129638671875, 5.337053298950195, 355.65350341796875, 698.3214721679688, 62.0028076171875, 681.1669311523438, 328.2160339355469, 263.0662841796875, 97.22930145263672, -147.58364868164062, 151.03009033203125, 15.195531845092773, 31.744049072265625, 679.3907470703125, 167.01834106445312, 759.363037109375, 609.9560546875, 203.86526489257812, -4.629180908203125, 87.96408081054688, 103.78239440917969, 305.6950378417969, 117.26019287109375, 324.66815185546875, 179.503173828125, 386.1099548339844, 583.0347900390625, 664.9716796875, 241.69300842285156], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000456.npy"}
|
|
{"epoch": 0.6696035242290749, "step": 457, "batch_size": 64, "mean": 257.47955322265625, "std": 237.2364044189453, "min": -255.85252380371094, "p10": -56.17950706481931, "median": 295.35601806640625, "p90": 551.4568725585938, "max": 838.1109619140625, "pos_frac": 0.84375, "sample": [14.947822570800781, 383.4229736328125, 354.3431396484375, 296.74285888671875, 377.6986083984375, 339.3813171386719, 490.986083984375, 435.3406982421875, 179.138916015625, 51.354736328125, 838.1109619140625, 460.8758544921875, 96.88864135742188, 62.3317985534668, 132.2217559814453, 217.2944793701172, 440.3136291503906, 142.80250549316406, 429.6309509277344, 463.72845458984375, 607.185546875, -7.409114837646484, 229.58399963378906, 405.36572265625, 331.97991943359375, 602.6060791015625, -250.45030212402344, -93.91392517089844, 551.357666015625, 541.4949340820312, 123.23948669433594, 352.1350402832031, 178.6470184326172, 381.3761901855469, 293.96917724609375, -179.83473205566406, 551.4993896484375, 364.9527893066406, 125.08114624023438, -66.29238891601562, 262.05560302734375, 585.4681396484375, 309.962646484375, -32.58278274536133, -16.451915740966797, 5.8478546142578125, 459.7961120605469, 52.41443634033203, 339.63006591796875, 338.0383605957031, -99.04314422607422, 347.79302978515625, 158.29917907714844, 675.8453369140625, -255.85252380371094, 469.947021484375, 169.6919403076172, -115.91497039794922, 281.0370788574219, 73.16493225097656, 140.738525390625, 373.44091796875, 80.98168182373047, 624.25390625], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000457.npy"}
|
|
{"epoch": 0.671071953010279, "step": 458, "batch_size": 64, "mean": 261.27423095703125, "std": 263.63287353515625, "min": -230.21701049804688, "p10": -89.43376388549804, "median": 280.8219299316406, "p90": 472.6646636962891, "max": 1233.3421630859375, "pos_frac": 0.796875, "sample": [-125.54621887207031, 350.97393798828125, 417.72125244140625, 272.32769775390625, 214.44627380371094, -23.452743530273438, 1233.3421630859375, -140.36973571777344, 346.19781494140625, 341.6653137207031, 138.17526245117188, 251.9541015625, 420.9434814453125, 326.73095703125, 607.7257080078125, -100.48529052734375, -90.14228820800781, -14.434686660766602, 216.32440185546875, 354.80615234375, 443.2995910644531, 412.3269348144531, 71.20861053466797, 478.22210693359375, 288.47552490234375, 300.61358642578125, 219.98382568359375, 164.3997802734375, 273.1683349609375, 306.4588317871094, 355.343994140625, 762.2919921875, 447.6393737792969, 374.7109375, 265.2379455566406, -39.4541015625, 324.8877258300781, 60.384429931640625, 515.8802490234375, 159.27801513671875, 585.3160400390625, 303.9392395019531, -87.7805404663086, -114.6570816040039, 187.61151123046875, -230.21701049804688, 369.16986083984375, -2.7896652221679688, 150.33212280273438, 1129.9862060546875, 289.9559326171875, 319.8448791503906, 259.08935546875, 99.46922302246094, 138.75616455078125, 459.6972961425781, 336.2556457519531, 365.15533447265625, 194.6060333251953, 325.994384765625, -62.759376525878906, 413.7110290527344, -163.84896850585938, 271.45001220703125], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000458.npy"}
|
|
{"epoch": 0.6725403817914831, "step": 459, "batch_size": 64, "mean": 244.49298095703125, "std": 271.5580749511719, "min": -497.1595458984375, "p10": -65.89676208496093, "median": 213.81085968017578, "p90": 607.705584716797, "max": 989.2587280273438, "pos_frac": 0.859375, "sample": [56.44880676269531, 25.16070556640625, 214.31585693359375, 255.5755615234375, 131.5159912109375, 243.45333862304688, -155.55836486816406, 102.81382751464844, 385.97540283203125, 130.67369079589844, 416.1072692871094, 300.4249572753906, 432.9169616699219, 390.6270751953125, -23.93181037902832, 272.6077880859375, 14.066864013671875, 395.2892761230469, 111.07494354248047, 677.0774536132812, 45.475250244140625, 703.2327880859375, 567.799072265625, -92.14341735839844, 285.9277038574219, -59.53959655761719, 399.4232177734375, -68.62126159667969, 117.37678527832031, 62.36634063720703, 142.2420196533203, 90.2854995727539, 648.8939208984375, -497.1595458984375, 989.2587280273438, 247.78038024902344, 908.507568359375, 374.81646728515625, 129.5108642578125, -106.921630859375, 142.3828125, 176.05206298828125, 236.84075927734375, 86.133056640625, 0.51953125, 265.295166015625, -183.48245239257812, 178.49221801757812, 213.3058624267578, 506.7108154296875, 621.7990112304688, 118.16975402832031, 164.8460693359375, 820.3200073242188, 259.0655517578125, 209.14337158203125, 459.4166564941406, -119.29532623291016, 527.1630249023438, 574.8209228515625, 381.5533752441406, 259.1024169921875, 25.30081558227539, 458.7471923828125], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000459.npy"}
|
|
{"epoch": 0.6740088105726872, "step": 460, "batch_size": 64, "mean": 246.62152099609375, "std": 267.0519104003906, "min": -281.0988464355469, "p10": -90.05415267944333, "median": 241.80146026611328, "p90": 604.1376342773439, "max": 971.1099243164062, "pos_frac": 0.796875, "sample": [664.9014282226562, 455.0509033203125, 186.74099731445312, -35.990753173828125, 84.37855529785156, 540.46484375, 650.8934326171875, 294.6412353515625, -281.0988464355469, 214.66844177246094, -23.28978729248047, 522.310302734375, 158.15965270996094, 556.892578125, 139.96876525878906, 113.19085693359375, 237.16226196289062, -224.52626037597656, -28.650375366210938, 971.1099243164062, 649.5550537109375, 224.35284423828125, 290.4495544433594, 121.91815185546875, 731.0357666015625, 292.07763671875, 770.4798583984375, 268.39056396484375, 24.797401428222656, 23.64654541015625, 223.53067016601562, -244.79232788085938, 530.4521484375, -99.92460632324219, 325.8755187988281, 616.20703125, -67.0230941772461, 424.4841003417969, 290.3586120605469, -18.94182586669922, 406.921875, 402.24212646484375, 158.1031951904297, 264.7976379394531, 385.86676025390625, 71.95303344726562, 141.1633758544922, 168.3446807861328, 246.44065856933594, 268.1300048828125, -181.21873474121094, -50.752174377441406, 93.09880828857422, 258.8785095214844, -115.45063781738281, 264.1468811035156, 493.2066650390625, 191.6129150390625, -203.37338256835938, 575.9757080078125, 410.27398681640625, 334.78936767578125, 153.4815673828125, 471.237548828125], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000460.npy"}
|
|
{"epoch": 0.6754772393538914, "step": 461, "batch_size": 64, "mean": 206.36936950683594, "std": 267.1855773925781, "min": -355.7235107421875, "p10": -83.6949691772461, "median": 155.37054443359375, "p90": 546.6864349365237, "max": 941.9738159179688, "pos_frac": 0.765625, "sample": [95.13735961914062, -80.57997131347656, 336.61676025390625, 203.013427734375, -20.35651397705078, 479.4057922363281, -355.7235107421875, 721.5781860351562, 56.118316650390625, 355.45379638671875, 101.8097152709961, -100.4622573852539, 104.1988525390625, 357.2303771972656, 744.256103515625, 373.163818359375, 609.0664672851562, -0.2151470184326172, 146.0027313232422, 236.0154571533203, 135.9977569580078, 575.52099609375, 382.7650146484375, 824.125732421875, -75.78565979003906, 5.9688568115234375, 825.4627075195312, 941.9738159179688, 79.78439331054688, 197.6845703125, 387.8628234863281, 156.4748077392578, 273.2291259765625, 15.142303466796875, 350.189208984375, 154.2662811279297, -177.93560791015625, -27.813323974609375, -85.02996826171875, -88.39901733398438, 420.89910888671875, -31.34168243408203, 395.6396484375, 433.4537658691406, -219.6719207763672, 283.4179382324219, 159.02891540527344, 440.7698974609375, 17.189056396484375, 65.34576416015625, 422.1631164550781, 90.9166488647461, 120.58039855957031, 140.91009521484375, 361.5079650878906, -64.86450958251953, -1.840423583984375, 224.6181640625, 172.2669677734375, 154.0303955078125, 55.91289520263672, -244.9847869873047, 323.72589111328125, 274.75189208984375], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000461.npy"}
|
|
{"epoch": 0.6769456681350955, "step": 462, "batch_size": 64, "mean": 175.31207275390625, "std": 239.1533966064453, "min": -346.9539794921875, "p10": -99.11108398437497, "median": 154.28439331054688, "p90": 421.6520538330078, "max": 842.8453979492188, "pos_frac": 0.734375, "sample": [292.42633056640625, -32.707359313964844, -37.434532165527344, -112.36329650878906, 375.3961486816406, 351.59722900390625, 32.73577880859375, 399.8094177246094, 318.74798583984375, 360.29473876953125, 290.1376953125, -222.0462646484375, -178.5772247314453, 8.043270111083984, 136.76388549804688, 337.9287109375, 499.85186767578125, 179.05349731445312, -50.21600341796875, 419.3550720214844, -141.19387817382812, 178.15240478515625, 328.29296875, -346.9539794921875, -57.07938766479492, 48.534332275390625, 422.636474609375, -181.5222930908203, 38.6362190246582, 83.52299499511719, 772.0111083984375, 139.1705322265625, -57.5770378112793, 258.12591552734375, 402.389404296875, 842.8453979492188, -24.05634117126465, -64.3713150024414, 598.2073974609375, 272.66937255859375, 83.68669128417969, -48.05384826660156, 231.4202117919922, 126.28699493408203, 60.73553466796875, 65.89140319824219, 287.2439270019531, 524.2476196289062, -180.03927612304688, 118.52355194091797, 59.762664794921875, 227.49267578125, 653.0748291015625, 351.52850341796875, 397.5267028808594, -10.06488037109375, 315.05523681640625, 219.70555114746094, 256.6148681640625, 302.45654296875, 97.50367736816406, 96.92764282226562, -68.18925476074219, 169.39825439453125], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000462.npy"}
|
|
{"epoch": 0.6784140969162996, "step": 463, "batch_size": 64, "mean": 222.03793334960938, "std": 291.1636657714844, "min": -264.5283508300781, "p10": -143.2866439819336, "median": 219.48914337158203, "p90": 622.8869812011719, "max": 938.6567993164062, "pos_frac": 0.75, "sample": [34.897926330566406, 459.1947937011719, -64.15314483642578, 47.23508071899414, 131.06640625, 332.08770751953125, -255.20028686523438, 10.966545104980469, 330.3758544921875, -145.97877502441406, 429.8692626953125, -108.84359741210938, 358.5789794921875, 469.9281921386719, 102.95086669921875, 188.19412231445312, 247.88412475585938, 399.1155090332031, 283.8183898925781, 0.8805656433105469, 112.95260620117188, 938.6567993164062, 239.68478393554688, 201.66937255859375, 602.0593872070312, 457.35406494140625, 58.08761978149414, 51.819557189941406, -121.51828002929688, 249.22967529296875, -185.08169555664062, -0.8428115844726562, -264.5283508300781, -137.0050048828125, 678.5963745117188, -194.05453491210938, -31.241409301757812, 121.39404296875, 295.3818664550781, -89.39067077636719, 471.12078857421875, 325.74163818359375, 628.01611328125, 339.1551208496094, 769.2681884765625, 730.9783935546875, 27.4632568359375, 263.61431884765625, 610.9190063476562, -172.1906280517578, 237.3089141845703, 839.3331909179688, 162.3383026123047, 514.901611328125, -87.04871368408203, -1.810882568359375, 264.3074951171875, 150.43460083007812, -163.59645080566406, 237.59042358398438, 372.6836853027344, 455.1705017089844, 853.1256103515625, 145.51138305664062], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000463.npy"}
|
|
{"epoch": 0.6798825256975036, "step": 464, "batch_size": 64, "mean": 316.3305969238281, "std": 295.2831726074219, "min": -358.4253845214844, "p10": -60.32945480346678, "median": 298.9971008300781, "p90": 620.9921386718751, "max": 1397.439697265625, "pos_frac": 0.875, "sample": [-76.21086120605469, 609.83349609375, 289.6255187988281, 112.54074096679688, 141.32952880859375, 573.2681884765625, 271.15313720703125, -67.63895416259766, 105.92178344726562, 315.2470703125, 513.167236328125, 527.9935302734375, -87.39846801757812, 595.4049072265625, 249.61068725585938, 343.7578125, 498.27191162109375, 104.77030944824219, 65.10137939453125, 598.1537475585938, 580.0198974609375, 319.596435546875, 673.3465576171875, -133.51564025878906, 771.2619018554688, 87.3539810180664, 308.3686828613281, 535.1123046875, -116.65572357177734, 146.21600341796875, 284.8437194824219, 220.10638427734375, 323.26239013671875, 62.14871597290039, 238.86837768554688, 329.62628173828125, 565.2584838867188, 308.53875732421875, 193.65484619140625, 87.54608154296875, 104.89651489257812, 124.33514404296875, 625.7744140625, 112.92668914794922, 117.72749328613281, 443.7229309082031, -129.16104125976562, 222.29318237304688, 14.646995544433594, 565.4356689453125, 560.5736083984375, 858.5879516601562, 1397.439697265625, 657.304931640625, 355.7835693359375, 497.4942932128906, 590.5121459960938, -43.273956298828125, 754.8846435546875, 557.5471801757812, 456.0241394042969, -358.4253845214844, 261.11602783203125, 28.130409240722656], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000464.npy"}
|
|
{"epoch": 0.6813509544787077, "step": 465, "batch_size": 64, "mean": 283.0708312988281, "std": 257.1004638671875, "min": -239.06790161132812, "p10": -46.38876457214352, "median": 270.83453369140625, "p90": 570.9812683105469, "max": 1129.265625, "pos_frac": 0.875, "sample": [531.4556884765625, 569.1388549804688, 259.2672424316406, 173.4193878173828, -154.51516723632812, 77.57992553710938, 490.89630126953125, 127.91845703125, 143.769775390625, 194.00927734375, 478.1734924316406, 174.06475830078125, 142.12066650390625, 153.76348876953125, 550.244140625, 700.9983520507812, 265.040771484375, 346.5198059082031, 309.76171875, 121.5663070678711, -149.04989624023438, 487.06622314453125, 506.10491943359375, 208.6265869140625, 1129.265625, 0.8707427978515625, 276.6282958984375, 92.5269775390625, 240.6708984375, 317.82421875, 430.1452941894531, 341.1512451171875, 38.916603088378906, 163.30430603027344, 103.01832580566406, 462.9551086425781, -12.858360290527344, 285.8514404296875, -60.75893783569336, 334.6136169433594, 571.7708740234375, 262.4178771972656, 326.9552001953125, -216.20346069335938, 416.1421813964844, 289.52825927734375, 152.7459259033203, 401.9956970214844, 58.71147537231445, 263.30633544921875, 714.8526611328125, -132.15711975097656, 773.4384765625, -239.06790161132812, 672.3074951171875, 340.36785888671875, -84.52656555175781, 254.63265991210938, 403.73529052734375, 425.8783874511719, 41.02311706542969, 395.0888671875, 635.0096435546875, 536.5142822265625], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000465.npy"}
|
|
{"epoch": 0.6828193832599119, "step": 466, "batch_size": 64, "mean": 219.94815063476562, "std": 259.3417663574219, "min": -496.3849182128906, "p10": -62.42949409484862, "median": 229.03195190429688, "p90": 555.6256103515627, "max": 879.1305541992188, "pos_frac": 0.78125, "sample": [274.9501037597656, -12.126724243164062, 257.33984375, -144.6187744140625, 410.4595031738281, 317.14263916015625, 317.3354187011719, 177.5955047607422, 420.45196533203125, -33.79759216308594, -4.359245300292969, 247.79354858398438, -203.549560546875, 575.3951416015625, -48.154964447021484, 247.561767578125, 187.39332580566406, -14.774364471435547, 283.86248779296875, 133.49884033203125, 323.8522644042969, 209.55722045898438, 196.10450744628906, 334.11517333984375, 582.7864990234375, 159.34140014648438, 219.56829833984375, 453.2173156738281, 275.68878173828125, 363.35675048828125, 149.49267578125, 186.73800659179688, 397.42474365234375, 687.8703002929688, -496.3849182128906, 879.1305541992188, 303.0171813964844, 677.1942138671875, 405.5986328125, 62.99494171142578, 420.019775390625, 8.63412857055664, 238.49560546875, 152.17837524414062, 476.74847412109375, 725.8535766601562, -68.54714965820312, -225.54891967773438, 37.41704559326172, 165.13156127929688, 509.4967041015625, 213.49220275878906, 262.3917236328125, 33.73540496826172, 390.6581726074219, 373.6013488769531, -380.9054870605469, 45.01233673095703, 673.1983032226562, -112.89845275878906, 297.0163879394531, -23.50292205810547, -0.5956802368164062, 105.53591918945312], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000466.npy"}
|
|
{"epoch": 0.684287812041116, "step": 467, "batch_size": 64, "mean": 243.0078125, "std": 327.18408203125, "min": -362.69866943359375, "p10": -152.3011947631836, "median": 210.6802520751953, "p90": 709.7457824707033, "max": 934.8265380859375, "pos_frac": 0.75, "sample": [406.98980712890625, 680.263427734375, 543.3526000976562, 683.5941772460938, -346.65631103515625, -335.82342529296875, 302.7626037597656, 365.1484680175781, 195.1328125, 178.21270751953125, 225.22476196289062, 361.04608154296875, 720.95361328125, 21.71772575378418, 372.54888916015625, 411.87249755859375, 70.81330108642578, 295.25537109375, 788.3343505859375, 564.4176025390625, 342.02117919921875, 59.33164596557617, 79.8868408203125, 462.6402893066406, -81.05476379394531, -206.94131469726562, -42.67008972167969, 216.83645629882812, 810.9505004882812, 293.66363525390625, 642.9249267578125, 130.9242401123047, 387.1607666015625, -107.32056427001953, 787.643798828125, -3.791015625, 934.8265380859375, -95.73486328125, 74.5906982421875, 542.5801391601562, 141.13433837890625, -362.69866943359375, 100.33667755126953, -149.78863525390625, 128.10389709472656, 234.76214599609375, -107.812744140625, -63.360870361328125, -245.361083984375, 540.51513671875, 543.382568359375, 473.81201171875, 387.5977478027344, -38.07083511352539, 839.53173828125, 176.96380615234375, 450.4528503417969, 106.08206176757812, -153.3780059814453, 819.2640991210938, -320.7475891113281, 43.11538314819336, 70.51009368896484, 204.5240478515625], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000467.npy"}
|
|
{"epoch": 0.6857562408223201, "step": 468, "batch_size": 64, "mean": 264.21685791015625, "std": 271.2340393066406, "min": -327.5756530761719, "p10": -43.234102058410635, "median": 223.31024932861328, "p90": 623.1481872558595, "max": 915.3363647460938, "pos_frac": 0.859375, "sample": [321.3701477050781, -30.76930809020996, 915.3363647460938, 301.34918212890625, 579.4952392578125, 548.0557861328125, 318.9205322265625, 499.4228820800781, 478.2375793457031, 332.0337829589844, 243.46034240722656, 240.2858428955078, 112.6387939453125, 204.33334350585938, 74.88534545898438, 97.1983642578125, 105.70423889160156, 25.160490036010742, 416.8092346191406, 230.31402587890625, 156.504150390625, 36.007625579833984, -13.373115539550781, 182.3212890625, 333.93328857421875, 447.1598205566406, 204.52093505859375, 802.6962280273438, 721.79638671875, 98.54843139648438, -173.32896423339844, -48.57615661621094, 449.489013671875, 278.5061950683594, 398.6059265136719, 320.5782470703125, 212.61068725585938, 93.32565307617188, 591.6158447265625, -327.5756530761719, 869.9874267578125, 75.28571319580078, 220.57957458496094, 213.67355346679688, 279.0147705078125, 214.69610595703125, 7.4063720703125, 99.38619995117188, 6.0283660888671875, 905.714599609375, 231.16122436523438, 756.944091796875, 180.15585327148438, 438.87762451171875, 154.56777954101562, 454.4337158203125, 531.9940185546875, 636.6620483398438, 226.04092407226562, -68.66241455078125, 117.56962585449219, -205.78897094726562, -146.8383026123047, -68.61981964111328], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000468.npy"}
|
|
{"epoch": 0.6872246696035242, "step": 469, "batch_size": 64, "mean": 273.3966979980469, "std": 264.33392333984375, "min": -274.6695556640625, "p10": 5.097416877746584, "median": 205.6656723022461, "p90": 670.4923706054689, "max": 944.7505493164062, "pos_frac": 0.90625, "sample": [587.5969848632812, 301.0656433105469, 93.18904876708984, 214.6641387939453, 637.7311401367188, 36.743675231933594, 540.4458618164062, 221.685302734375, 65.74104309082031, 151.29779052734375, 433.6073303222656, 74.42060852050781, 164.4031219482422, 8.181365966796875, 836.0863037109375, 118.65660858154297, 458.270263671875, 530.05029296875, 325.4437255859375, 370.07318115234375, 629.9666748046875, 227.00735473632812, 125.49952697753906, 174.49571228027344, 944.7505493164062, -3.037811279296875, 439.87420654296875, 252.67364501953125, 699.5548095703125, -180.80078125, 196.03854370117188, 172.6251678466797, 433.8077087402344, 50.673484802246094, 326.993896484375, 77.96837615966797, 63.36107635498047, -70.67184448242188, 4.36456298828125, -16.637985229492188, 320.5160827636719, 196.66720581054688, 684.5328979492188, 13.000408172607422, -202.63063049316406, 191.46310424804688, 360.2462463378906, 363.28179931640625, 333.80419921875, 6.807409286499023, 499.336181640625, 701.9445190429688, 281.0072326660156, 827.9341430664062, 816.8138427734375, 466.8440856933594, 130.31399536132812, 351.41583251953125, 116.08897399902344, 176.60366821289062, 156.1561279296875, 109.68754577636719, 152.36302185058594, -274.6695556640625], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000469.npy"}
|
|
{"epoch": 0.6886930983847284, "step": 470, "batch_size": 64, "mean": 267.87860107421875, "std": 252.1519775390625, "min": -546.5853881835938, "p10": -38.171462631225566, "median": 233.22549438476562, "p90": 594.437078857422, "max": 752.79443359375, "pos_frac": 0.875, "sample": [171.98342895507812, 333.279541015625, 110.71298217773438, 71.09647369384766, 194.24932861328125, 50.74188232421875, 249.85916137695312, 98.29894256591797, 98.39814758300781, -174.1339569091797, 404.74249267578125, 154.7860565185547, 462.9934387207031, 153.4334259033203, 462.3607482910156, 673.827880859375, 9.97303581237793, 599.2650756835938, -546.5853881835938, 400.38134765625, 84.29281616210938, 141.70343017578125, 69.11318969726562, -102.99529266357422, 113.8558349609375, 215.95236206054688, -20.210445404052734, 242.371337890625, 340.26470947265625, 210.9215087890625, 514.1597900390625, -142.5890655517578, 752.79443359375, 183.1165008544922, 57.04412078857422, 478.2869873046875, 269.211181640625, 488.12744140625, 618.5848388671875, 395.03118896484375, 583.1717529296875, 185.01882934570312, 426.0577697753906, 577.1046142578125, -61.52705383300781, 709.0918579101562, 351.8235778808594, 255.04322814941406, 490.8874816894531, 454.320068359375, 342.69671630859375, -45.869041442871094, 210.929443359375, 214.52365112304688, 493.63226318359375, 653.0010986328125, -107.26512145996094, 736.5660400390625, 459.4971008300781, 177.3783416748047, 559.9298095703125, 224.07965087890625, 103.22344207763672, 262.2146301269531], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000470.npy"}
|
|
{"epoch": 0.6901615271659325, "step": 471, "batch_size": 64, "mean": 217.40028381347656, "std": 249.05921936035156, "min": -317.024169921875, "p10": -80.9323257446289, "median": 241.2394256591797, "p90": 474.33399658203143, "max": 1139.638916015625, "pos_frac": 0.796875, "sample": [572.2532348632812, -14.789939880371094, 199.52496337890625, 550.76416015625, 139.904052734375, 270.5011291503906, 219.8711700439453, 850.2381591796875, 408.81768798828125, 122.7332763671875, 136.87315368652344, 134.324951171875, 280.5211181640625, 256.7573547363281, 359.6902770996094, 31.620113372802734, -102.17074584960938, -102.94720458984375, 276.7542419433594, 277.7096252441406, 1139.638916015625, -46.49693298339844, 215.03457641601562, 400.03131103515625, 401.9485778808594, -85.23348236083984, -103.75796508789062, 72.020751953125, -22.894859313964844, 275.6115417480469, -70.89629364013672, 670.7508544921875, 251.44317626953125, 495.0965270996094, 263.2917175292969, 425.8880920410156, 278.7209167480469, 181.30799865722656, 306.7789306640625, 417.39874267578125, 162.98907470703125, 68.30587005615234, 158.7105255126953, 46.83369064331055, 269.560546875, 632.194580078125, 282.8070068359375, -283.8421630859375, 378.38336181640625, 32.38041687011719, -317.024169921875, 163.4425048828125, 21.160348892211914, 313.42529296875, 57.49653625488281, 287.44024658203125, -53.560020446777344, -26.369613647460938, 231.03567504882812, 317.4498596191406, 295.6087646484375, 395.4438171386719, -114.96624755859375, 260.07867431640625], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000471.npy"}
|
|
{"epoch": 0.6916299559471366, "step": 472, "batch_size": 64, "mean": 264.68792724609375, "std": 315.24456787109375, "min": -276.5950927734375, "p10": -51.452735137939456, "median": 187.37596130371094, "p90": 778.5894104003908, "max": 1175.3695068359375, "pos_frac": 0.875, "sample": [-276.5950927734375, -104.31602478027344, 482.5911560058594, -168.42172241210938, 1.9955902099609375, 187.77005004882812, 218.39846801757812, -105.08932495117188, 88.59471130371094, -249.48782348632812, 184.22268676757812, 250.2811279296875, 113.43330383300781, 256.8328857421875, 70.28134155273438, 805.7757568359375, 40.65403747558594, 239.57220458984375, 148.24200439453125, 231.8194580078125, 799.0237426757812, 116.4122314453125, 221.27978515625, 30.684127807617188, 186.98187255859375, 141.69093322753906, 140.64694213867188, 72.88321685791016, 195.811279296875, 597.3138427734375, 31.359817504882812, 484.6047058105469, 241.03564453125, 635.1734619140625, 1175.3695068359375, 571.0925903320312, 174.3452911376953, 870.95361328125, 175.98573303222656, 112.09716796875, 473.62139892578125, 639.2987060546875, 90.18527221679688, 261.2125244140625, -105.65303039550781, 832.471923828125, 1127.140869140625, 196.1385955810547, 730.9093017578125, 8.832290649414062, 919.94921875, -51.04692077636719, 38.88838195800781, -51.62665557861328, 365.5805358886719, 338.0445251464844, 543.2153930664062, 395.3520202636719, 83.78366088867188, 109.84193420410156, 190.46095275878906, 12.446392059326172, 262.0141906738281, 137.6670684814453], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000472.npy"}
|
|
{"epoch": 0.6930983847283406, "step": 473, "batch_size": 64, "mean": 187.09295654296875, "std": 310.81170654296875, "min": -452.1529846191406, "p10": -191.58651428222652, "median": 208.06275177001953, "p90": 427.0539581298828, "max": 1595.38134765625, "pos_frac": 0.734375, "sample": [-59.051612854003906, 229.44601440429688, 289.8770446777344, 337.6397705078125, -48.17440414428711, 363.531494140625, 80.07958984375, 210.224365234375, 373.4468994140625, 178.8290252685547, 428.306884765625, 422.5558166503906, 48.90043640136719, 274.71185302734375, -76.63723754882812, -114.04085540771484, 237.17941284179688, 101.68214416503906, 264.7164001464844, 305.403564453125, -208.15670776367188, 215.60549926757812, 334.17291259765625, 205.90113830566406, 125.34034729003906, 61.999244689941406, 263.0312194824219, 546.6312866210938, 191.07974243164062, 183.38229370117188, -310.3516540527344, 187.27032470703125, -265.6537170410156, 172.6436309814453, -355.9327087402344, -240.17738342285156, 1595.38134765625, 372.63909912109375, 374.4176025390625, 949.7130737304688, 280.9776611328125, 252.55523681640625, 424.1304626464844, 280.5709228515625, 191.0386199951172, 288.042236328125, -65.37630462646484, -452.1529846191406, 50.48127746582031, 347.60919189453125, 455.1588439941406, 386.779541015625, -152.9227294921875, -95.60482788085938, 167.00335693359375, -57.450660705566406, 524.2777709960938, -391.70306396484375, 351.2430419921875, 200.35244750976562, -105.03318786621094, 318.03466796875, -20.9564266204834, 549.33056640625], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000473.npy"}
|
|
{"epoch": 0.6945668135095447, "step": 474, "batch_size": 64, "mean": 193.70852661132812, "std": 266.4131774902344, "min": -533.3684692382812, "p10": -107.5204689025879, "median": 175.90780639648438, "p90": 507.54735107421885, "max": 886.2493286132812, "pos_frac": 0.8125, "sample": [-286.564697265625, 259.983154296875, 14.89188003540039, 522.6847534179688, 408.5069274902344, 161.15432739257812, 594.2266235351562, 6.602460861206055, 247.90481567382812, 351.96044921875, 445.94879150390625, 478.3179931640625, 22.89088249206543, -1.5826644897460938, 106.54842376708984, 433.9421691894531, 121.91470336914062, -0.1584625244140625, -282.63372802734375, 261.09149169921875, 20.375961303710938, 402.0697937011719, -108.80835723876953, 9.13174057006836, 49.17346954345703, 474.3798828125, 203.67242431640625, 152.245361328125, 289.9237060546875, 80.50148010253906, 156.29824829101562, -70.73626708984375, 186.38864135742188, 80.3385238647461, 395.4854736328125, 231.0953826904297, 520.07421875, 98.71298217773438, 8.636701583862305, -128.44638061523438, 369.51165771484375, 83.63742065429688, 390.651123046875, 35.552001953125, 13.306800842285156, 165.42697143554688, 434.5039367675781, 824.6209106445312, -104.51539611816406, -331.3210754394531, 291.1507873535156, 447.757568359375, 245.08578491210938, 289.49365234375, 564.2752685546875, 207.87770080566406, 303.3297119140625, -168.667724609375, 422.2391662597656, -77.38067626953125, 108.95067596435547, 610.8344116210938, -533.3684692382812, 886.2493286132812], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000474.npy"}
|
|
{"epoch": 0.6960352422907489, "step": 475, "batch_size": 64, "mean": 272.6217041015625, "std": 254.68055725097656, "min": -311.34661865234375, "p10": -21.22152862548828, "median": 269.1395568847656, "p90": 523.1603240966798, "max": 994.9415283203125, "pos_frac": 0.828125, "sample": [-21.6065673828125, 54.14677810668945, 338.450439453125, 211.80596923828125, 8.06231689453125, 357.095703125, 912.10546875, 205.85885620117188, 733.1600952148438, 483.40447998046875, 220.53173828125, 221.64181518554688, 411.82171630859375, 44.35826110839844, 355.4073486328125, -27.053621292114258, 502.3232116699219, 89.72520446777344, 405.18096923828125, -40.81208038330078, 282.3851013183594, 361.733642578125, 380.1813659667969, 111.69718933105469, 33.18239974975586, 224.47378540039062, 294.0758056640625, 375.50927734375, 63.0224723815918, 397.347412109375, 84.11396026611328, 477.7868957519531, -311.34661865234375, 457.7439270019531, 331.00872802734375, 532.0905151367188, 269.72711181640625, 254.4601593017578, 156.6140899658203, 867.9625854492188, 270.8327331542969, -10.364189147949219, 189.2152099609375, -141.28233337402344, 407.4610595703125, 154.72946166992188, 102.65682220458984, -20.323104858398438, 268.552001953125, 337.29949951171875, -12.699676513671875, 358.5901794433594, 432.11114501953125, -12.240081787109375, 756.6130981445312, -55.34452819824219, 244.4345245361328, 994.9415283203125, 414.4328308105469, 655.7854614257812, 253.53082275390625, 492.709716796875, -86.49283599853516, 343.2612609863281], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000475.npy"}
|
|
{"epoch": 0.697503671071953, "step": 476, "batch_size": 64, "mean": 231.59701538085938, "std": 234.40133666992188, "min": -327.5909423828125, "p10": -22.857147789001452, "median": 195.37583923339844, "p90": 551.3712768554688, "max": 855.3955078125, "pos_frac": 0.875, "sample": [462.0427551269531, 28.965585708618164, 45.21942901611328, 158.18592834472656, 211.21376037597656, 376.952880859375, 676.490478515625, 302.11773681640625, 136.8299560546875, -157.245849609375, 562.462646484375, -41.555381774902344, 697.955322265625, 402.94873046875, 396.491943359375, 506.26666259765625, 359.26812744140625, 55.13311004638672, 173.61508178710938, 211.631591796875, 232.65066528320312, 310.449951171875, 11.838180541992188, 478.0369873046875, 267.9071960449219, 387.4041748046875, -327.5909423828125, 25.34270477294922, 212.12982177734375, 67.84212493896484, 12.834312438964844, 109.50027465820312, -57.081356048583984, 12.679206848144531, 366.31378173828125, 73.00868225097656, 67.49555206298828, -8.865346908569336, 319.6678161621094, 91.2489242553711, -28.853633880615234, 424.5498046875, 108.0446548461914, -104.38816833496094, 105.94011688232422, 171.5180206298828, 551.5866088867188, 550.8688354492188, 855.3955078125, 704.6201171875, 187.98741149902344, 96.43730163574219, 150.47314453125, 219.26318359375, 145.33445739746094, 96.61412048339844, 387.8925476074219, 436.06341552734375, 103.78140258789062, 303.4894104003906, 655.0408935546875, 397.7603759765625, 202.76426696777344, -117.77699279785156], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000476.npy"}
|
|
{"epoch": 0.6989720998531571, "step": 477, "batch_size": 64, "mean": 209.00033569335938, "std": 259.73150634765625, "min": -711.3925170898438, "p10": -52.01798706054686, "median": 196.14491271972656, "p90": 549.5196411132813, "max": 785.1243896484375, "pos_frac": 0.859375, "sample": [-35.1895751953125, 375.20050048828125, 380.2647399902344, -12.042533874511719, 533.7608032226562, 276.551513671875, 162.72488403320312, 282.79364013671875, 285.6931457519531, 316.0122375488281, 139.20761108398438, 75.02985382080078, 238.14210510253906, 431.7692565917969, 678.500244140625, 31.44352149963379, 362.8468017578125, 218.3836669921875, 763.6400146484375, 138.884521484375, 212.58450317382812, 46.789093017578125, 10.711593627929688, 79.6356430053711, -146.75027465820312, 273.65838623046875, 236.24301147460938, 92.35134887695312, 0.6374225616455078, -75.33584594726562, -277.568115234375, 263.8591003417969, -711.3925170898438, -179.43527221679688, 642.6922607421875, 134.40640258789062, 233.74282836914062, 226.12191772460938, 450.15240478515625, 44.980106353759766, 439.62506103515625, 36.63876724243164, 369.748779296875, 553.342041015625, 116.87100219726562, 167.06082153320312, 174.91201782226562, 575.7310791015625, 370.2575378417969, 130.11431884765625, 595.3264770507812, -176.14547729492188, 61.98963165283203, 179.705322265625, 0.6150970458984375, 540.6007080078125, 258.1122131347656, -59.23016357421875, 70.84121704101562, 37.285823822021484, 50.271324157714844, 365.6548767089844, 785.1243896484375, 529.86767578125], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000477.npy"}
|
|
{"epoch": 0.7004405286343612, "step": 478, "batch_size": 64, "mean": 248.9481201171875, "std": 242.1370391845703, "min": -226.43191528320312, "p10": -3.446876716613752, "median": 193.5533218383789, "p90": 581.4909851074219, "max": 1024.338623046875, "pos_frac": 0.890625, "sample": [-10.700178146362305, 165.7505645751953, 186.8631591796875, 135.49188232421875, 158.9046630859375, 275.1761474609375, 278.5381164550781, -42.25447082519531, 46.119590759277344, 23.20458984375, 129.2829132080078, 305.71392822265625, 22.22725486755371, 179.37901306152344, 44.708290100097656, 592.3705444335938, 422.0331115722656, 757.2883911132812, 189.8657989501953, 154.1761474609375, -67.14759826660156, 57.970703125, -226.43191528320312, 260.2253112792969, 300.2352600097656, 667.4423217773438, 372.252197265625, 171.26670837402344, 14.928802490234375, 253.7976837158203, 1024.338623046875, 478.0444641113281, 218.96690368652344, 230.0618896484375, 332.62115478515625, 440.7172546386719, 556.1053466796875, 307.761962890625, 367.7991027832031, -19.014266967773438, 406.0739440917969, 34.62158966064453, 77.66755676269531, -76.34131622314453, 595.0667724609375, 333.6103210449219, 178.4250030517578, 832.4486694335938, 159.8058319091797, 263.90081787109375, 442.5523986816406, 67.8008041381836, 176.02764892578125, 510.0706481933594, -46.60853576660156, 191.8900146484375, 395.2257385253906, 29.151016235351562, 816.4642333984375, 14.608650207519531, 200.3438720703125, 195.2166290283203, 365.0985107421875, 13.477493286132812], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000478.npy"}
|
|
{"epoch": 0.7019089574155654, "step": 479, "batch_size": 64, "mean": 213.73294067382812, "std": 273.55609130859375, "min": -723.9920654296875, "p10": -71.076594543457, "median": 224.251953125, "p90": 566.7043640136719, "max": 860.6651611328125, "pos_frac": 0.8125, "sample": [54.238990783691406, 59.437217712402344, 609.3885498046875, 347.1236267089844, 253.0222930908203, 479.5006103515625, -22.619556427001953, -93.07793426513672, 385.99267578125, -0.2909870147705078, 350.0021667480469, 59.00558090209961, -49.561485290527344, 321.772705078125, 601.60595703125, 263.82525634765625, 259.22430419921875, 59.924957275390625, 18.51439666748047, 56.400089263916016, -37.50448989868164, 441.99774169921875, 97.23133850097656, 240.55450439453125, 860.6651611328125, 733.8868408203125, 508.7115783691406, 383.1087951660156, 454.0377197265625, 103.14839935302734, 352.9316711425781, 207.94940185546875, 301.5879821777344, 568.038818359375, -250.34347534179688, 563.5906372070312, -220.32980346679688, 59.64844512939453, 131.03659057617188, 35.05009078979492, 289.66766357421875, 560.5232543945312, 403.86865234375, 305.9424133300781, 98.85957336425781, -723.9920654296875, 264.710693359375, 140.39068603515625, -80.29735565185547, 52.860870361328125, 124.5665283203125, -44.926185607910156, -138.06158447265625, -118.58106231689453, 242.01380920410156, 11.662513732910156, 372.1629638671875, 92.23370361328125, 128.82608032226562, 843.9774780273438, 243.21397399902344, 613.8675537109375, 317.7205810546875, 129.2705078125], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000479.npy"}
|
|
{"epoch": 0.7033773861967695, "step": 480, "batch_size": 64, "mean": 184.87521362304688, "std": 268.92584228515625, "min": -477.743896484375, "p10": -139.94107666015626, "median": 160.9864959716797, "p90": 491.14727478027345, "max": 953.1569213867188, "pos_frac": 0.765625, "sample": [-125.90150451660156, 63.11003875732422, -89.48098754882812, 93.87771606445312, 41.218017578125, 277.0416564941406, 235.89877319335938, 494.3959045410156, 153.81817626953125, 519.4004516601562, 261.6216125488281, 710.2269287109375, 77.49320983886719, -140.7562713623047, 43.728153228759766, -195.27316284179688, 66.03474426269531, 421.5440368652344, 176.20767211914062, 592.7802734375, 93.7785873413086, -146.50975036621094, 20.375473022460938, 331.24462890625, 125.9895248413086, 219.39112854003906, 407.58941650390625, 14.84715461730957, 412.6314392089844, -138.03895568847656, 462.6612548828125, 679.4645385742188, 386.3677978515625, 18.997478485107422, -387.5896301269531, 297.64691162109375, 344.32147216796875, -4.3458709716796875, 310.29351806640625, -11.522035598754883, -286.45892333984375, -477.743896484375, 482.3615417480469, 420.66436767578125, 953.1569213867188, 175.572509765625, 83.31143951416016, 483.567138671875, -8.846752166748047, 30.382539749145508, -2.8256683349609375, -146.88502502441406, 81.4894790649414, 168.15481567382812, 128.2257080078125, 180.28346252441406, 254.30694580078125, 58.73379135131836, 383.19842529296875, 579.4940185546875, 451.82781982421875, -31.18613624572754, 288.16278076171875, 468.4873962402344], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000480.npy"}
|
|
{"epoch": 0.7048458149779736, "step": 481, "batch_size": 64, "mean": 247.4109344482422, "std": 269.6463928222656, "min": -353.40960693359375, "p10": -49.76932296752928, "median": 190.0373306274414, "p90": 591.9353759765626, "max": 1051.740966796875, "pos_frac": 0.859375, "sample": [432.27764892578125, -353.40960693359375, 481.560791015625, 115.22051239013672, 185.73585510253906, 148.7764434814453, -56.61204528808594, 194.33880615234375, 315.3045959472656, 637.9879150390625, 48.58409881591797, -251.22854614257812, 73.62782287597656, -149.78289794921875, 171.5376434326172, 330.5890808105469, 19.646562576293945, 411.093994140625, 790.7406005859375, 45.58714294433594, 153.19219970703125, 345.91064453125, 243.99554443359375, 438.5306091308594, 256.83209228515625, 164.29605102539062, 93.2652587890625, 453.88616943359375, 82.53866577148438, -191.55844116210938, -97.51982116699219, 431.4471435546875, 146.07223510742188, -227.52252197265625, 522.1671142578125, 344.5694580078125, -33.80297088623047, 175.71583557128906, 516.2344970703125, 288.28936767578125, -11.692323684692383, 209.72348022460938, 372.4561767578125, 598.74609375, 79.34146881103516, 151.161865234375, 696.1259765625, 129.92947387695312, 1051.740966796875, 756.7650756835938, 382.07421875, 9.143081665039062, 185.08221435546875, 378.0041809082031, 94.29857635498047, 157.594482421875, 26.983211517333984, 223.3084716796875, 576.043701171875, 429.0402526855469, 787.6636962890625, 116.25523376464844, 284.614501953125, 451.780517578125], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000481.npy"}
|
|
{"epoch": 0.7063142437591777, "step": 482, "batch_size": 64, "mean": 166.3587188720703, "std": 270.72003173828125, "min": -677.5010375976562, "p10": -84.1823959350586, "median": 132.26885986328125, "p90": 484.67257690429693, "max": 1046.775390625, "pos_frac": 0.71875, "sample": [404.5130920410156, -98.56962585449219, 308.9786682128906, -32.497283935546875, 452.218994140625, 499.6839294433594, -160.96121215820312, 464.45318603515625, 386.83782958984375, 176.18948364257812, 158.6085205078125, 199.39309692382812, -0.7482147216796875, -14.722442626953125, 585.1494750976562, 413.44854736328125, 201.98634338378906, 115.38798522949219, 116.48002624511719, 68.19227600097656, -85.72178649902344, 69.39471435546875, 287.13690185546875, 491.5500183105469, -172.9144744873047, -2.6099853515625, -0.16950225830078125, 402.80999755859375, 263.7940368652344, 112.06355285644531, 325.2236022949219, 357.6423034667969, 468.6252136230469, 196.94381713867188, -80.59048461914062, 93.87588500976562, 295.167236328125, 32.251922607421875, 722.4115600585938, -324.0055847167969, 404.0653991699219, 67.27422332763672, -25.821603775024414, 12.066841125488281, 252.40089416503906, 57.7432861328125, -677.5010375976562, 314.96575927734375, 1046.775390625, -6.192573547363281, 9.123321533203125, 18.607315063476562, 8.65802001953125, 207.55076599121094, -372.14825439453125, 257.028564453125, 148.0576934814453, 617.0916748046875, -52.8646240234375, 178.0125274658203, -35.60858917236328, -56.082313537597656, 495.60089111328125, 81.25332641601562], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000482.npy"}
|
|
{"epoch": 0.7077826725403817, "step": 483, "batch_size": 64, "mean": 290.10418701171875, "std": 294.56207275390625, "min": -285.1842346191406, "p10": -36.01844482421874, "median": 262.69825744628906, "p90": 682.7653076171875, "max": 1383.448974609375, "pos_frac": 0.859375, "sample": [-58.17027282714844, 132.24270629882812, 555.2740478515625, 424.4561462402344, -17.163619995117188, 560.9820556640625, 176.80059814453125, 264.35357666015625, 564.9154663085938, 562.1945190429688, 437.31988525390625, 527.1489868164062, 375.3214111328125, -27.214035034179688, 13.294517517089844, 55.42485427856445, 768.7792358398438, 294.74151611328125, -204.2742919921875, 302.9097900390625, 106.55059051513672, -42.02214813232422, 775.5546875, 751.8201904296875, 204.65475463867188, 61.857261657714844, 845.168701171875, 382.6420593261719, 56.37635803222656, 578.6646728515625, -85.18849182128906, 722.3013305664062, 9.521141052246094, 684.8562622070312, 88.15663146972656, 677.8864135742188, 417.7865295410156, 382.2289123535156, 128.7298583984375, 436.79986572265625, 261.0429382324219, 348.37530517578125, 228.46270751953125, -39.79176330566406, 100.39276885986328, 169.66281127929688, 242.46121215820312, 130.80821228027344, 473.07861328125, -177.75965881347656, 337.39862060546875, -285.1842346191406, 148.674560546875, 1383.448974609375, 57.00856018066406, 213.05174255371094, 349.58489990234375, 271.7135314941406, 369.5950927734375, 36.12019348144531, 304.8188171386719, 531.370849609375, 177.00830078125, 41.643184661865234], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000483.npy"}
|
|
{"epoch": 0.7092511013215859, "step": 484, "batch_size": 64, "mean": 251.25234985351562, "std": 249.2833709716797, "min": -224.8191375732422, "p10": 8.635432052612305, "median": 190.16104125976562, "p90": 589.0455810546875, "max": 1002.690673828125, "pos_frac": 0.90625, "sample": [351.1949462890625, 74.92811584472656, 578.3928833007812, 215.06903076171875, -78.44801330566406, 33.21107482910156, 279.11260986328125, 113.88924407958984, 665.5584106445312, 137.26980590820312, 62.531375885009766, 96.24503326416016, 293.0274658203125, 270.5829162597656, 492.8685302734375, 375.59716796875, 234.9364013671875, 111.9473876953125, -185.93408203125, -224.8191375732422, 129.15328979492188, 170.08619689941406, 339.51849365234375, 189.04873657226562, 1002.690673828125, 15.49432373046875, 202.34890747070312, 435.84991455078125, 17.14358139038086, -48.319793701171875, -1.5499534606933594, 212.2340087890625, 516.8040771484375, 671.0188598632812, 12.025947570800781, 242.08987426757812, 73.8648681640625, 161.2919158935547, 191.16273498535156, 8.062387466430664, 128.60012817382812, 51.633506774902344, 576.6895141601562, 306.5295715332031, 644.488037109375, 102.4808349609375, 495.9681701660156, 473.86236572265625, 73.31209564208984, -45.536399841308594, 9.972536087036133, 184.31964111328125, 593.6110229492188, 499.3013916015625, 189.1593475341797, 444.1650390625, 715.941650390625, 258.78179931640625, 572.7634887695312, 11.879003524780273, 694.0591430664062, 66.80482482910156, 47.45258331298828, 546.7303466796875], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000484.npy"}
|
|
{"epoch": 0.71071953010279, "step": 485, "batch_size": 64, "mean": 241.04345703125, "std": 283.1424255371094, "min": -488.4234619140625, "p10": -50.68816833496092, "median": 198.10443115234375, "p90": 652.417810058594, "max": 958.7407836914062, "pos_frac": 0.828125, "sample": [234.4440155029297, 342.47369384765625, 171.6868438720703, 169.06195068359375, 297.59259033203125, 156.77999877929688, 377.4039611816406, 1.187347412109375, 246.22084045410156, 362.2063293457031, 194.504638671875, 15.108024597167969, 479.5595397949219, 464.6318359375, 167.59255981445312, 836.1292724609375, -288.88641357421875, 1.971944808959961, 681.28369140625, 585.0640869140625, 445.8144836425781, 578.206787109375, 104.48971557617188, 300.9696044921875, 958.7407836914062, 855.7181396484375, -22.58489227294922, 69.90705108642578, 299.5268859863281, -147.96820068359375, -24.23657989501953, 64.6124267578125, -101.43706512451172, -3.1920242309570312, 265.1156005859375, 347.587158203125, 505.35040283203125, 344.44305419921875, 102.95260620117188, 360.07720947265625, -31.14044189453125, 113.93429565429688, 789.7706298828125, 130.5048828125, -160.768310546875, 297.3487548828125, 16.99397850036621, 201.7042236328125, 212.12380981445312, -488.4234619140625, 726.596923828125, 479.5652160644531, 52.552310943603516, -59.065765380859375, 102.63114929199219, -71.95558166503906, 709.44970703125, 76.65843200683594, 134.96328735351562, 282.3975830078125, 154.974365234375, 17.575916290283203, 501.72674560546875, 436.5539245605469], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000485.npy"}
|
|
{"epoch": 0.7121879588839941, "step": 486, "batch_size": 64, "mean": 255.81402587890625, "std": 313.8223571777344, "min": -448.400146484375, "p10": -106.13300018310545, "median": 272.985595703125, "p90": 654.5919067382814, "max": 1070.2227783203125, "pos_frac": 0.796875, "sample": [726.859619140625, 268.9329528808594, 113.46345520019531, 109.21533203125, 278.1600341796875, 66.91596984863281, 104.28353881835938, -71.35335540771484, 550.7064208984375, -115.60202026367188, 131.01181030273438, 502.4259033203125, 523.7711181640625, -14.825508117675781, -448.400146484375, 945.4609375, 617.381591796875, 277.0382385253906, -208.5885772705078, 670.5391845703125, 720.7161865234375, -177.30564880371094, 482.2421875, 68.54150390625, -84.03861999511719, 13.774940490722656, 428.2059326171875, 407.111083984375, 23.138519287109375, 402.7450256347656, 494.427490234375, 67.83527374267578, -286.893798828125, -304.7169189453125, 65.80104064941406, 325.5782165527344, 37.43621826171875, 611.4847412109375, 455.999267578125, 372.6362609863281, 326.51751708984375, 395.84918212890625, 397.53582763671875, -80.85520935058594, 435.82757568359375, 873.833740234375, 546.2525634765625, 412.6886291503906, 309.4346618652344, 717.640869140625, -47.721107482910156, 250.14117431640625, 182.66552734375, 287.5372009277344, 412.32220458984375, 126.71626281738281, 67.48975372314453, 349.2956237792969, -312.5874938964844, 192.1730194091797, 242.1416473388672, -0.188079833984375, 65.04735565185547, 1070.2227783203125], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000486.npy"}
|
|
{"epoch": 0.7136563876651982, "step": 487, "batch_size": 64, "mean": 220.59432983398438, "std": 229.49984741210938, "min": -445.42535400390625, "p10": -106.01204071044918, "median": 252.1113739013672, "p90": 498.27135620117195, "max": 757.5179443359375, "pos_frac": 0.84375, "sample": [364.22100830078125, 484.73931884765625, 159.9204864501953, 145.48373413085938, 757.5179443359375, 446.1636962890625, 432.9888916015625, 288.2798767089844, 564.6847534179688, 36.41944122314453, 155.12908935546875, -143.62362670898438, 548.4570922851562, -166.88577270507812, 25.03130340576172, 394.50115966796875, 434.83441162109375, -153.43161010742188, 320.1552429199219, 677.1751098632812, 378.32220458984375, 309.7637634277344, -208.33221435546875, 132.77288818359375, 57.666175842285156, 211.03390502929688, 36.394371032714844, 283.6309814453125, 431.95794677734375, 20.30635643005371, 269.28338623046875, 14.148481369018555, 298.3702392578125, 355.72906494140625, 194.44659423828125, -169.3632354736328, 189.26995849609375, -445.42535400390625, 422.7626037597656, 336.0099792480469, 283.95068359375, 238.00115966796875, 266.2215881347656, -41.99536895751953, 273.752197265625, 318.7010192871094, -72.68467712402344, 175.2236328125, 323.0049133300781, 87.18099975585938, 159.09425354003906, 176.05670166015625, 336.4559631347656, 367.2347412109375, 130.64939880371094, 22.641448974609375, 156.48434448242188, 513.8285522460938, 504.07080078125, 507.3257751464844, -30.63494873046875, 430.2751159667969, -120.29519653320312, 222.98397827148438], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000487.npy"}
|
|
{"epoch": 0.7151248164464024, "step": 488, "batch_size": 64, "mean": 231.46987915039062, "std": 205.17987060546875, "min": -174.27699279785156, "p10": -12.363195991516108, "median": 226.38773345947266, "p90": 473.35776977539064, "max": 774.6635131835938, "pos_frac": 0.84375, "sample": [-2.1899032592773438, 97.45686340332031, 548.550048828125, 114.94932556152344, 649.6939697265625, -6.0183258056640625, 365.54327392578125, 25.300621032714844, 24.262542724609375, 219.06671142578125, 241.63946533203125, 180.68113708496094, 346.0874328613281, 370.23309326171875, 262.51806640625, 397.48992919921875, -2.543426513671875, -39.885215759277344, -174.27699279785156, 107.196044921875, 180.711181640625, 23.888710021972656, 340.3934326171875, 277.1354675292969, 79.75531005859375, 162.24703979492188, 168.13885498046875, 270.8659973144531, 774.6635131835938, 166.66746520996094, 227.83114624023438, 328.6416015625, -26.869483947753906, 343.94561767578125, 493.5318603515625, 224.94432067871094, 444.5377502441406, 430.2986755371094, 296.3611145019531, 259.9107360839844, 304.1125183105469, 467.02593994140625, 274.6344909667969, 40.62272644042969, 196.09584045410156, 130.02056884765625, 435.286865234375, 603.8174438476562, -130.91421508789062, -46.610450744628906, 207.79054260253906, 382.28302001953125, 313.9447326660156, 81.21231842041016, 346.9716796875, 463.5578308105469, -15.082426071166992, 270.5812683105469, 34.415645599365234, 37.29967498779297, -128.7155303955078, 476.0714111328125, 185.393798828125, 690.9017333984375], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000488.npy"}
|
|
{"epoch": 0.7165932452276065, "step": 489, "batch_size": 64, "mean": 213.39279174804688, "std": 231.56883239746094, "min": -168.72799682617188, "p10": 17.201992416381852, "median": 122.74484634399414, "p90": 524.744271850586, "max": 886.1926879882812, "pos_frac": 0.921875, "sample": [384.1700134277344, -87.08546447753906, 44.47135925292969, 74.15589904785156, 71.41800689697266, 126.35890197753906, 62.742488861083984, 501.2835998535156, 76.38066101074219, 174.47369384765625, 34.2463264465332, 47.66036605834961, 391.83892822265625, 134.6048583984375, 289.7236022949219, -41.08674621582031, 48.25141525268555, 287.23126220703125, 96.26995086669922, 599.033203125, 138.95851135253906, 60.92572021484375, 88.52363586425781, 213.54611206054688, 52.794864654541016, 90.70428466796875, 54.19898986816406, 66.04620361328125, 121.1393051147461, 175.1878204345703, 123.50372314453125, 57.04808044433594, 43.209014892578125, -2.255552291870117, 752.7778930664062, 360.54437255859375, 160.7015380859375, 89.42955017089844, 183.7242431640625, 121.98596954345703, 362.87158203125, 0.7714004516601562, 682.5065307617188, 39.29575729370117, 499.9920349121094, 345.0408020019531, 356.3055419921875, 702.3132934570312, 116.65556335449219, 79.20474243164062, 886.1926879882812, 281.0854187011719, 220.9361572265625, 845.052490234375, -104.4629135131836, 104.19873809814453, 532.1669921875, 273.1034240722656, 507.4245910644531, 77.323974609375, 9.89727783203125, 506.628662109375, 232.52481079101562, -168.72799682617188], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000489.npy"}
|
|
{"epoch": 0.7180616740088106, "step": 490, "batch_size": 64, "mean": 232.60052490234375, "std": 297.8207702636719, "min": -514.4844970703125, "p10": -99.10841293334961, "median": 187.14690399169922, "p90": 665.2663757324219, "max": 963.7933349609375, "pos_frac": 0.796875, "sample": [58.326385498046875, -514.4844970703125, 784.3070678710938, -96.9422378540039, 114.64596557617188, 44.13726043701172, 203.369384765625, 418.3819885253906, 456.10382080078125, -182.4246063232422, 71.93255615234375, -29.0294189453125, 557.1041259765625, 149.12136840820312, 425.72979736328125, -3.7704391479492188, -166.23291015625, 194.28021240234375, 478.83221435546875, -173.35879516601562, -7.871509552001953, 102.69403076171875, -41.01392364501953, 73.62751770019531, -267.2740478515625, 668.9757080078125, 160.06729125976562, 192.2247772216797, 212.70013427734375, 387.9208679199219, 61.40656280517578, 656.6112670898438, 118.9873046875, 498.2503356933594, 178.53671264648438, 83.2417221069336, 690.7647705078125, 292.87591552734375, 963.7933349609375, 6.535785675048828, 341.94525146484375, 200.05975341796875, 643.4063720703125, 753.2166748046875, 182.06903076171875, 766.5079956054688, -173.26708984375, 382.1929931640625, 31.827911376953125, 362.6192626953125, 200.69227600097656, -46.83570098876953, 845.8216552734375, 210.8162841796875, 343.1742248535156, 540.5355224609375, 9.3228759765625, -100.03677368164062, 135.12490844726562, 603.4428100585938, 49.748836517333984, 392.6983337402344, 224.498291015625, 163.76806640625], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000490.npy"}
|
|
{"epoch": 0.7195301027900147, "step": 491, "batch_size": 64, "mean": 173.57102966308594, "std": 247.66006469726562, "min": -363.4009094238281, "p10": -121.50504684448241, "median": 174.07431030273438, "p90": 480.7712310791016, "max": 913.0147705078125, "pos_frac": 0.734375, "sample": [274.934814453125, -35.59004211425781, 504.170654296875, 171.29010009765625, 440.6690673828125, 195.9601593017578, 199.94216918945312, 53.46818542480469, 207.60997009277344, 40.7994384765625, 166.28160095214844, 233.39036560058594, -55.848411560058594, -22.380691528320312, 139.62661743164062, 45.4023323059082, 279.3026123046875, 228.72998046875, -84.09422302246094, 349.4541320800781, 176.8585205078125, -125.88118743896484, 521.1671752929688, -252.85287475585938, 182.2381134033203, 136.12806701660156, 470.4690246582031, -280.7890625, 396.6947021484375, 79.99810028076172, 654.1399536132812, 241.21258544921875, -363.4009094238281, 114.28255462646484, 94.80946350097656, 218.69625854492188, -12.534797668457031, 608.7402954101562, 252.7281036376953, 485.18646240234375, 206.46324157714844, -171.955078125, 826.9727172851562, -38.3189697265625, -111.29405212402344, 237.35650634765625, -63.8758544921875, 210.82064819335938, -32.12622833251953, 326.3498229980469, 73.86769104003906, -156.49046325683594, 463.6107177734375, 115.59300231933594, 88.90038299560547, 259.7001953125, 361.06640625, 401.0274963378906, 87.24494934082031, -25.90692138671875, 913.0147705078125, 104.65703582763672, 256.3223876953125, -155.46310424804688], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000491.npy"}
|
|
{"epoch": 0.7209985315712188, "step": 492, "batch_size": 64, "mean": 140.69276428222656, "std": 270.7463684082031, "min": -467.5142822265625, "p10": -212.76310729980463, "median": 101.3649673461914, "p90": 538.1568481445313, "max": 717.818115234375, "pos_frac": 0.703125, "sample": [66.79442596435547, 109.94950866699219, 247.40621948242188, -121.66353607177734, 175.10696411132812, 72.83097839355469, 22.523284912109375, 398.755615234375, 31.334186553955078, -59.1408576965332, -46.803565979003906, 310.9295959472656, -234.12689208984375, -35.11235809326172, -467.5142822265625, 547.8192138671875, 10.862266540527344, 336.16259765625, -6.937498092651367, 589.438232421875, -81.71682739257812, 199.27304077148438, 207.95925903320312, -40.882720947265625, 399.86376953125, 217.83209228515625, 341.43011474609375, -65.84660339355469, 123.83552551269531, 88.98918151855469, -27.285491943359375, 44.263954162597656, 54.635066986083984, -314.66009521484375, -162.91427612304688, 515.611328125, 658.7006225585938, 190.67578125, -403.1446838378906, 10.214958190917969, 717.818115234375, -359.1408386230469, 556.8245239257812, -6.630836486816406, -121.04383850097656, 487.1109619140625, 126.83663940429688, 91.69650268554688, 704.2532348632812, 92.78042602539062, 282.4396057128906, 62.85414123535156, 254.16644287109375, -255.05764770507812, 391.21136474609375, 266.1556091308594, 356.504638671875, 159.7237091064453, 63.3438606262207, 303.2002258300781, -250.41036987304688, 653.5405883789062, 182.19100952148438, 338.520751953125], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000492.npy"}
|
|
{"epoch": 0.7224669603524229, "step": 493, "batch_size": 64, "mean": 172.57424926757812, "std": 223.87423706054688, "min": -336.23760986328125, "p10": -97.77105026245114, "median": 156.73401641845703, "p90": 452.56164855957036, "max": 715.8389892578125, "pos_frac": 0.78125, "sample": [238.6576690673828, 45.46501922607422, 348.5343322753906, 203.74929809570312, -53.097198486328125, 715.8389892578125, 424.04901123046875, 8.451019287109375, 290.09527587890625, -336.23760986328125, 322.130859375, 77.80522155761719, 454.480712890625, 179.22955322265625, 134.3113250732422, 200.39453125, 54.906890869140625, 137.75888061523438, 184.1073455810547, 38.354095458984375, 141.26145935058594, 302.2051086425781, 365.6492004394531, 278.3988952636719, 251.63436889648438, 200.13204956054688, 400.175537109375, -170.60311889648438, -43.552818298339844, -114.2739028930664, 54.10003662109375, -282.23345947265625, 314.9207763671875, -8.697158813476562, 448.0838317871094, 44.028038024902344, 517.63134765625, 505.04815673828125, 52.816307067871094, -112.34803771972656, -1.345550537109375, 284.26434326171875, 433.9656982421875, 456.2474670410156, 188.51043701171875, 672.7418212890625, -63.758079528808594, 216.36981201171875, 714.7327880859375, 414.04180908203125, 175.04159545898438, 121.66529846191406, -121.51600646972656, 172.20657348632812, 19.589569091796875, -125.86234283447266, -39.73223876953125, 57.87862014770508, 377.75469970703125, 101.07115173339844, 57.95490264892578, 31.270294189453125, -31.71484375, 120.01216125488281], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000493.npy"}
|
|
{"epoch": 0.723935389133627, "step": 494, "batch_size": 64, "mean": 255.2093505859375, "std": 235.39366149902344, "min": -364.7923278808594, "p10": 11.900524139404304, "median": 226.93499755859375, "p90": 564.9315124511719, "max": 876.3247680664062, "pos_frac": 0.90625, "sample": [-1.5266780853271484, 165.27467346191406, 222.49501037597656, 326.0207214355469, 876.3247680664062, 725.0662841796875, -303.9883728027344, 512.8318481445312, 124.77865600585938, 85.48431396484375, 404.47314453125, 178.5676727294922, 181.0944061279297, 426.87481689453125, 92.44959259033203, 601.2680053710938, 381.02374267578125, -99.4114990234375, 397.9143371582031, 479.5255126953125, 90.40470123291016, -364.7923278808594, 373.9964294433594, 568.2445678710938, 499.75103759765625, 66.71278381347656, 429.6640930175781, 557.2010498046875, 203.8289031982422, 99.27325439453125, -109.91431427001953, 26.410003662109375, 233.28431701660156, 8.908683776855469, 632.4437255859375, 267.9390563964844, 389.7955322265625, 122.57335662841797, 164.7174835205078, 716.09814453125, 605.0702514648438, 150.0984344482422, -52.66905212402344, 52.42571258544922, 103.60315704345703, 296.59576416015625, 514.0780639648438, 319.6455383300781, 167.83395385742188, 267.33270263671875, 216.49986267089844, 18.881484985351562, 431.37652587890625, 164.91030883789062, 223.87966918945312, 72.0308837890625, 403.17974853515625, 349.9034423828125, 229.99032592773438, 331.8568115234375, 257.4322814941406, 249.60523986816406, 181.88186645507812, 24.875274658203125], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000494.npy"}
|
|
{"epoch": 0.7254038179148311, "step": 495, "batch_size": 64, "mean": 242.9305419921875, "std": 254.86520385742188, "min": -186.0829315185547, "p10": -39.20667572021482, "median": 222.27851104736328, "p90": 591.2710998535158, "max": 1012.0523681640625, "pos_frac": 0.8125, "sample": [308.2210388183594, -48.91337585449219, 295.958251953125, -11.28564453125, -0.2967529296875, 174.6549072265625, -186.0829315185547, 123.60372924804688, 248.2847900390625, 29.096939086914062, 211.93736267089844, 497.007080078125, 556.5761108398438, -181.25563049316406, 287.1781005859375, 56.633811950683594, -6.175676345825195, 516.163818359375, 79.35293579101562, 441.8068542480469, 116.24627685546875, -124.73342895507812, 297.4650573730469, 177.31268310546875, 256.3475646972656, 606.140380859375, 505.4104919433594, 290.0870666503906, 95.6325912475586, 362.2564697265625, 244.3360137939453, 354.44195556640625, 232.61965942382812, 210.92974853515625, 272.1104736328125, 624.5036010742188, 550.27392578125, -171.9517364501953, 706.490234375, 646.9998779296875, 376.4923400878906, 104.87699890136719, 69.1562271118164, 163.23092651367188, 144.22500610351562, -16.557708740234375, 1.8411102294921875, 264.6441955566406, 122.8182144165039, -120.22611999511719, 96.74713897705078, -2.7587966918945312, 296.5018310546875, 279.62939453125, 537.6138305664062, 131.7397918701172, 75.38809204101562, 488.75433349609375, -103.61543273925781, 823.6526489257812, 708.9200439453125, 296.55908203125, 1012.0523681640625, 150.4842529296875], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000495.npy"}
|
|
{"epoch": 0.7268722466960352, "step": 496, "batch_size": 64, "mean": 182.71994018554688, "std": 206.97659301757812, "min": -252.5174102783203, "p10": -49.82727966308593, "median": 163.9483184814453, "p90": 489.2760864257814, "max": 667.2852783203125, "pos_frac": 0.796875, "sample": [110.34963989257812, -99.8747787475586, 544.98583984375, 556.887939453125, -196.99456787109375, 633.78125, 149.1514892578125, 206.75543212890625, 424.1990661621094, 164.96292114257812, 122.83433532714844, 103.10762786865234, 274.8743591308594, 162.9337158203125, 132.636474609375, 174.07254028320312, 37.14605712890625, 260.63299560546875, -135.55548095703125, 414.3729248046875, 503.1507568359375, 348.8738708496094, 667.2852783203125, 254.85382080078125, 519.4114379882812, 101.68756103515625, -47.47392272949219, 290.8094482421875, 67.57546997070312, 109.21505737304688, 181.20458984375, -24.688886642456055, 197.11569213867188, 348.3323669433594, 27.410972595214844, -61.71870422363281, 276.2650146484375, -32.325035095214844, -252.5174102783203, 214.06504821777344, -138.56939697265625, -42.2900390625, 260.5384521484375, 38.442909240722656, 113.75434875488281, 533.9544677734375, 12.035415649414062, 360.9938049316406, 456.90185546875, 377.8081359863281, 51.7137451171875, -50.83586120605469, 380.9442138671875, 170.14244079589844, 122.89633178710938, -11.312820434570312, 194.95550537109375, 384.03643798828125, 69.57422637939453, 30.769264221191406, 289.1872253417969, 323.1966247558594, 55.34978485107422, -19.902587890625], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000496.npy"}
|
|
{"epoch": 0.7283406754772394, "step": 497, "batch_size": 64, "mean": 163.83831787109375, "std": 209.07127380371094, "min": -281.83514404296875, "p10": -91.24934692382811, "median": 135.49564361572266, "p90": 442.41669921875007, "max": 637.7316284179688, "pos_frac": 0.78125, "sample": [-56.09859085083008, 486.1070556640625, 362.25604248046875, 526.6544799804688, 9.739307403564453, 90.1182861328125, 412.84075927734375, -96.95530700683594, 9.030166625976562, -66.68931579589844, 208.95297241210938, 353.5177001953125, 637.7316284179688, 214.11146545410156, 36.744834899902344, 17.14654541015625, 349.5552673339844, 99.09016418457031, 340.9905090332031, 255.96585083007812, 69.81550598144531, 72.02619171142578, 431.68658447265625, 279.4635009765625, -37.65000915527344, 50.28038787841797, 324.7264709472656, -77.93544006347656, 59.27586364746094, -29.231430053710938, 215.0222625732422, 109.3516845703125, -48.05845260620117, 360.88531494140625, 84.8707275390625, 576.71826171875, 113.31417846679688, 588.8787841796875, 165.97962951660156, 41.772552490234375, 35.488304138183594, 236.6015167236328, 40.96592712402344, 447.01531982421875, -131.38584899902344, 171.89476013183594, 294.6686096191406, -22.771697998046875, 282.8310241699219, 277.2433776855469, 314.5221252441406, 116.62345886230469, 485.7589416503906, 154.36782836914062, -134.22804260253906, 180.84341430664062, -203.0375518798828, 242.90121459960938, -140.90293884277344, 106.56600952148438, 422.9593200683594, -281.83514404296875, 197.5543975830078, -150.99473571777344], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000497.npy"}
|
|
{"epoch": 0.7298091042584435, "step": 498, "batch_size": 64, "mean": 203.47848510742188, "std": 266.6463623046875, "min": -490.9993591308594, "p10": -101.3709114074707, "median": 194.4609832763672, "p90": 498.83049926757815, "max": 839.7923583984375, "pos_frac": 0.765625, "sample": [-204.7421112060547, 250.36868286132812, 253.59957885742188, 133.75643920898438, 96.77548217773438, 260.13201904296875, 500.66131591796875, 459.4932861328125, 208.43008422851562, 180.49188232421875, 257.4383544921875, 517.837646484375, 105.15460205078125, 415.089599609375, -333.64434814453125, 434.99932861328125, 363.01446533203125, 673.9537963867188, 20.80866241455078, 473.84576416015625, 77.32164764404297, 368.6228942871094, -67.29690551757812, 252.084716796875, -176.78665161132812, 151.15625, 156.04037475585938, 29.39575958251953, -44.664894104003906, 88.79450225830078, 95.88736724853516, 343.03118896484375, -490.9993591308594, 786.5715942382812, 221.45767211914062, 2.836181640625, -96.8478012084961, 405.03167724609375, 494.55859375, 460.26580810546875, 839.7923583984375, -56.39330291748047, -18.8677978515625, 97.16474914550781, 533.5387573242188, 287.86102294921875, -90.57748413085938, 250.090576171875, 362.5177917480469, 428.37042236328125, 160.5244903564453, -103.30938720703125, 125.06961059570312, 168.20367431640625, -173.62876892089844, -15.962005615234375, 425.00628662109375, 135.81793212890625, 378.3241271972656, 271.47027587890625, -59.00682830810547, 400.29010009765625, 771.2860107421875, -218.88446044921875], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000498.npy"}
|
|
{"epoch": 0.7312775330396476, "step": 499, "batch_size": 64, "mean": 255.59036254882812, "std": 270.7709045410156, "min": -447.98980712890625, "p10": -77.90676116943358, "median": 276.67698669433594, "p90": 561.3824401855469, "max": 871.4974365234375, "pos_frac": 0.828125, "sample": [585.2811279296875, 105.77662658691406, 222.39114379882812, -369.6429138183594, 34.423606872558594, 447.9947204589844, 300.3180847167969, 206.42767333984375, 42.49224090576172, -48.014373779296875, 245.06520080566406, 335.4676513671875, 120.69207000732422, 565.5802612304688, 363.32562255859375, 389.9194030761719, 415.66259765625, -81.28691101074219, 780.1502685546875, 229.7943878173828, 230.56863403320312, 128.96653747558594, 528.636474609375, 41.11821746826172, -447.98980712890625, 523.9197998046875, 274.06640625, 216.48167419433594, -156.63536071777344, 424.3497619628906, 166.6385498046875, -70.01974487304688, 379.5433654785156, 64.8487777709961, 252.1520233154297, 127.5020523071289, 440.191650390625, 675.9652099609375, 426.7408752441406, -160.61265563964844, 41.83311462402344, 384.3587646484375, 532.623291015625, 289.70916748046875, 541.9378662109375, 222.28250122070312, 520.2401733398438, 871.4974365234375, 279.2875671386719, 109.56536102294922, 340.3992614746094, 636.9754638671875, 297.9323425292969, 402.7013854980469, 38.746585845947266, 476.2359619140625, 477.50537109375, 282.0509033203125, -285.2436218261719, -54.01432800292969, -156.8149871826172, 551.5875244140625, -24.78775978088379, 622.9227905273438], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000499.npy"}
|
|
{"epoch": 0.7327459618208517, "step": 500, "batch_size": 64, "mean": 223.8900604248047, "std": 271.2594909667969, "min": -329.77484130859375, "p10": -64.25037002563477, "median": 221.22093200683594, "p90": 583.007470703125, "max": 1012.7659912109375, "pos_frac": 0.78125, "sample": [-7.415254592895508, -56.56489181518555, -64.32176971435547, 1012.7659912109375, -129.91757202148438, 347.3666687011719, 343.75323486328125, 314.1830749511719, 328.1296691894531, 222.4764404296875, 643.3442993164062, 391.455322265625, 435.459228515625, -64.08377075195312, -76.52766418457031, 219.96542358398438, 32.677490234375, -2.9305419921875, 124.33998107910156, 571.8964233398438, 555.8068237304688, -17.637313842773438, 316.682861328125, 766.442626953125, 72.68193817138672, 746.927490234375, -309.1786804199219, 471.50714111328125, 390.4761962890625, -329.77484130859375, 147.6099090576172, 702.5269165039062, 240.4108123779297, 32.47154998779297, 245.5084228515625, 334.9945068359375, -295.7178955078125, 147.3701171875, 336.5587158203125, -227.5497283935547, -61.74951934814453, 250.38194274902344, 186.71868896484375, 396.1714782714844, 8.952064514160156, 113.06694793701172, 305.3455505371094, 61.45397186279297, 158.4979248046875, 200.51467895507812, 672.5946044921875, 211.20309448242188, 460.0762634277344, 175.2135009765625, 358.05426025390625, 257.9176940917969, 433.34326171875, 245.44732666015625, 145.32762145996094, 259.6147766113281, 29.630353927612305, 587.7693481445312, -55.426727294921875, 14.675674438476562], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000500.npy"}
|
|
{"epoch": 0.7342143906020558, "step": 501, "batch_size": 64, "mean": 241.00148010253906, "std": 270.3808288574219, "min": -387.23858642578125, "p10": -85.09793243408203, "median": 222.19961547851562, "p90": 552.2373046875001, "max": 895.7033081054688, "pos_frac": 0.78125, "sample": [-79.35916900634766, 294.49560546875, 337.7996826171875, -169.817626953125, -6.219882965087891, 718.19287109375, 187.31028747558594, 431.4343566894531, 220.1194305419922, 119.40536499023438, 221.0220947265625, -23.414121627807617, 366.7671813964844, 193.83987426757812, 864.2120361328125, 534.5651245117188, 714.2772216796875, 232.82122802734375, 193.4167938232422, 514.3359375, 487.2694396972656, 325.6075439453125, 1.2093353271484375, 131.3128662109375, 520.4097290039062, -135.5067138671875, -89.59767150878906, 139.765380859375, 597.0889282226562, 262.0077209472656, -356.17034912109375, 209.25985717773438, 239.42237854003906, 198.271728515625, 296.3028564453125, 895.7033081054688, -27.799026489257812, 315.76812744140625, 223.37713623046875, 393.1756591796875, 395.3536376953125, 31.423545837402344, 153.72036743164062, -90.09567260742188, 436.0379638671875, 306.12322998046875, -79.615478515625, 136.2819061279297, 25.24300765991211, -387.23858642578125, 172.06216430664062, 421.8046875, -87.44755554199219, -45.7884521484375, 113.4976806640625, -22.10021209716797, 371.4231262207031, 480.81866455078125, 559.8110961914062, 288.0297546386719, 850.0029296875, 435.0791931152344, 301.72039794921875, 165.86398315429688], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000501.npy"}
|
|
{"epoch": 0.73568281938326, "step": 502, "batch_size": 64, "mean": 244.53817749023438, "std": 187.71519470214844, "min": -176.3443603515625, "p10": 23.531704711914063, "median": 227.7038345336914, "p90": 505.1110382080078, "max": 787.008056640625, "pos_frac": 0.921875, "sample": [361.8970947265625, -103.95695495605469, 177.19412231445312, 360.285888671875, 27.62042236328125, 468.0650634765625, -63.628990173339844, 54.11224365234375, 503.983642578125, 312.90399169921875, 228.0294189453125, 289.6103820800781, 233.5164337158203, 462.1797790527344, 378.7669677734375, 40.5474853515625, 83.8519287109375, 108.02699279785156, -176.3443603515625, 170.13970947265625, 505.5942077636719, 398.7308349609375, 192.20126342773438, 202.14712524414062, 25.334747314453125, 209.41612243652344, 165.6525421142578, 184.1814727783203, 369.3565673828125, 297.060546875, 65.03852844238281, 528.8097534179688, 6.618675231933594, 253.89886474609375, 423.40557861328125, 496.6937561035156, 123.79975891113281, 563.9278564453125, -40.29344177246094, 68.79432678222656, 521.1215209960938, 207.4979705810547, 187.71820068359375, 199.41226196289062, 272.1051025390625, 385.8998718261719, 787.008056640625, -25.18886375427246, 236.77296447753906, 192.71893310546875, 399.592529296875, 127.65614318847656, 227.0068359375, 76.20706176757812, 568.5642700195312, 22.75897216796875, 227.3782501220703, 398.3722229003906, 519.1766357421875, 238.9901580810547, 185.98379516601562, 231.50210571289062, 247.21400451660156, 257.80328369140625], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000502.npy"}
|
|
{"epoch": 0.737151248164464, "step": 503, "batch_size": 64, "mean": 229.31605529785156, "std": 237.5438995361328, "min": -179.33047485351562, "p10": -16.56918601989745, "median": 197.1225357055664, "p90": 542.4219177246094, "max": 1080.1534423828125, "pos_frac": 0.859375, "sample": [-6.743671417236328, -59.34947967529297, 658.3656616210938, 208.1136016845703, -52.17835998535156, -103.34722900390625, 430.68841552734375, 206.2745361328125, 92.0396499633789, 32.293251037597656, -179.33047485351562, 166.94459533691406, 30.595359802246094, 148.7528839111328, 748.9017333984375, 322.2929992675781, 260.9394836425781, 355.2942810058594, 104.25039672851562, 272.2700500488281, 2.9182281494140625, 129.2699737548828, 100.68815612792969, 81.08802032470703, 427.7054748535156, 170.04759216308594, -108.38561248779297, 423.30633544921875, 35.797080993652344, 274.66607666015625, 216.69342041015625, -20.780120849609375, 82.7236328125, 385.5682373046875, -4.070549011230469, 60.56342315673828, 687.2115478515625, 1080.1534423828125, 215.3416748046875, 366.84283447265625, 112.58949279785156, 521.1283569335938, 358.0848388671875, 169.8718719482422, -55.093017578125, 260.251953125, 42.854164123535156, 590.079833984375, 281.1502685546875, 298.364501953125, 41.159645080566406, 286.9493103027344, 371.5155029296875, 187.9705352783203, 771.9832153320312, 423.63909912109375, 53.302513122558594, 377.9040222167969, 62.140892028808594, 45.55975341796875, 161.2413330078125, 268.22161865234375, 551.5477294921875, 249.39443969726562], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000503.npy"}
|
|
{"epoch": 0.7386196769456681, "step": 504, "batch_size": 64, "mean": 272.0382995605469, "std": 260.30487060546875, "min": -298.60565185546875, "p10": -100.72038650512692, "median": 256.94859313964844, "p90": 584.8726806640626, "max": 956.0687255859375, "pos_frac": 0.84375, "sample": [121.10256958007812, 151.3548583984375, 432.4968566894531, 160.7194061279297, 34.02085876464844, 178.93270874023438, 349.49285888671875, -166.71099853515625, 956.0687255859375, 759.6275634765625, 838.6746826171875, -110.89582061767578, 502.2449645996094, 334.882568359375, -20.185897827148438, 75.52586364746094, 396.6800537109375, 156.00790405273438, 94.68675994873047, 125.60626220703125, 315.1983642578125, 257.403564453125, 308.7548828125, 64.26408386230469, 305.6438293457031, 164.1334686279297, -131.62310791015625, 435.6292419433594, 39.46746826171875, 242.93031311035156, -298.60565185546875, 565.546630859375, 220.88587951660156, 690.7388916015625, 498.96728515625, 420.67340087890625, 593.1552734375, 453.22991943359375, 172.86734008789062, 256.4936218261719, 372.71429443359375, 715.3282470703125, 428.5946350097656, 306.3572082519531, 316.5902099609375, -121.80941772460938, -173.9459228515625, 297.27459716796875, 114.67173767089844, 551.9556884765625, 210.82192993164062, 432.5190734863281, 488.6959533691406, 415.68292236328125, 221.54246520996094, 147.15679931640625, -76.97770690917969, 227.1209716796875, 717.0020751953125, 253.04733276367188, -122.5740966796875, 339.5601806640625, 473.5408020019531, -70.5062255859375], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000504.npy"}
|
|
{"epoch": 0.7400881057268722, "step": 505, "batch_size": 64, "mean": 159.0027313232422, "std": 277.04144287109375, "min": -494.1295166015625, "p10": -148.52683105468748, "median": 142.19214630126953, "p90": 550.238284301758, "max": 791.051513671875, "pos_frac": 0.6875, "sample": [11.847129821777344, 499.50128173828125, -154.56002807617188, 418.57305908203125, -32.82478332519531, 569.76513671875, -44.80729675292969, 15.684432983398438, 82.55038452148438, 47.33290100097656, 598.2717895507812, 504.6756286621094, 791.051513671875, -7.6245269775390625, 352.74224853515625, 395.12371826171875, -2.7141571044921875, 4.31024169921875, 190.3912353515625, -96.70167541503906, 114.27085876464844, 485.06005859375, 166.8736572265625, 64.63290405273438, -247.15704345703125, -243.49981689453125, 117.51063537597656, 217.9168701171875, 451.2510986328125, 272.4028015136719, -79.23341369628906, 267.13958740234375, 436.4503173828125, -194.14120483398438, 88.1760482788086, 608.59521484375, 279.4936828613281, 204.2400360107422, -12.882972717285156, 245.78627014160156, -254.93310546875, -494.1295166015625, 190.2212371826172, 171.4547576904297, 20.299102783203125, 239.18896484375, 755.0700073242188, 309.1650085449219, -47.19297409057617, 655.0233154296875, -115.35395812988281, -134.44937133789062, -133.05484008789062, 650.0283813476562, 249.8324432373047, -108.34690856933594, 226.763916015625, -352.0318603515625, 25.474853515625, 243.6569366455078, 453.240478515625, 21.702651977539062, 242.07064819335938, -22.999523162841797], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000505.npy"}
|
|
{"epoch": 0.7415565345080763, "step": 506, "batch_size": 64, "mean": 246.73110961914062, "std": 252.24713134765625, "min": -318.23187255859375, "p10": -62.256898498535136, "median": 235.6341552734375, "p90": 563.2765563964845, "max": 903.3084106445312, "pos_frac": 0.8125, "sample": [-318.23187255859375, 36.10040283203125, 341.7423095703125, -39.14836120605469, 122.47384643554688, 221.8771514892578, -92.80987548828125, 303.96942138671875, 485.6318359375, 48.069881439208984, 448.6292419433594, 127.56523132324219, 145.4091033935547, 293.0596923828125, -120.57845306396484, 743.3709716796875, 365.95941162109375, 72.46945190429688, 197.14044189453125, 300.6023864746094, 213.10870361328125, 595.2239990234375, 381.8213806152344, 102.33702850341797, 903.3084106445312, -122.29995727539062, -45.49285125732422, 289.0652770996094, 435.31573486328125, 330.658935546875, 175.67823791503906, 542.3269653320312, 426.0057373046875, 95.34489440917969, 477.6181640625, -113.57979583740234, 378.3767395019531, 222.39031982421875, 408.9820251464844, 645.3455810546875, 422.13507080078125, 364.6078796386719, -33.27226257324219, 319.1468505859375, 162.45030212402344, -69.44149017333984, 248.87799072265625, 62.432762145996094, 694.7562255859375, 525.042236328125, 478.726318359375, -1.597726821899414, 125.2945556640625, 546.0661010742188, 570.6524658203125, 708.5564575195312, 8.547439575195312, 258.9345703125, 123.26591491699219, 7.209785461425781, -201.810546875, 182.5999755859375, 286.93695068359375, -24.165260314941406], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000506.npy"}
|
|
{"epoch": 0.7430249632892805, "step": 507, "batch_size": 64, "mean": 200.84158325195312, "std": 267.6584167480469, "min": -394.88970947265625, "p10": -54.864159393310544, "median": 178.73907470703125, "p90": 498.3138000488283, "max": 1182.728759765625, "pos_frac": 0.78125, "sample": [36.730247497558594, 143.7225799560547, -43.54579162597656, -394.88970947265625, 719.484619140625, 180.83660888671875, 103.59812927246094, 315.2839050292969, -69.26119232177734, 157.64682006835938, 28.469345092773438, 282.4175109863281, -116.31455993652344, 315.13909912109375, 391.7906799316406, 176.64154052734375, -209.4103240966797, 228.2164306640625, 90.68135070800781, 1182.728759765625, 242.1035614013672, 245.01190185546875, -1.5776100158691406, 263.8995666503906, 395.17193603515625, 324.25836181640625, 255.98779296875, 333.73394775390625, -32.76736831665039, 439.99462890625, 265.60723876953125, 17.66021728515625, -53.73375701904297, 40.95649719238281, -25.398462295532227, 172.06460571289062, 209.43402099609375, 373.8082580566406, 232.5965576171875, 24.37615203857422, 249.1871795654297, 19.153564453125, 55.113433837890625, -47.586158752441406, 1016.5791015625, 544.9813842773438, 81.7317123413086, 253.52845764160156, -117.41970825195312, 463.767578125, 443.3437805175781, 1.294546127319336, -37.7099609375, 513.1193237304688, 603.9104614257812, 421.4528503417969, 546.22314453125, 396.2086486816406, 279.11480712890625, 46.443504333496094, 23.91687774658203, -98.21951293945312, -55.34861755371094, 7.921472549438477], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000507.npy"}
|
|
{"epoch": 0.7444933920704846, "step": 508, "batch_size": 64, "mean": 192.35910034179688, "std": 279.5169372558594, "min": -515.6671142578125, "p10": -77.92820739746094, "median": 146.2189483642578, "p90": 520.1966918945312, "max": 1105.44970703125, "pos_frac": 0.71875, "sample": [145.6614990234375, 97.1431884765625, -259.72918701171875, 377.16595458984375, 302.1881103515625, -3.96527099609375, -39.283206939697266, 1105.44970703125, 376.2615051269531, 512.3854370117188, 305.9546203613281, 605.1381225585938, 170.24029541015625, 867.0013427734375, 432.9473876953125, 289.1859130859375, 278.4815368652344, -294.451171875, 614.27099609375, 341.39630126953125, -43.712886810302734, 104.47645568847656, 138.1520538330078, -37.714080810546875, 703.197021484375, 148.27127075195312, 146.77639770507812, -31.55559539794922, -49.60331726074219, 166.5697021484375, 30.330337524414062, 244.17434692382812, -6.2898101806640625, -22.262248992919922, 767.7630615234375, -515.6671142578125, 16.183616638183594, 112.44567108154297, 103.15695190429688, 313.7391052246094, 55.129608154296875, 332.4371643066406, 68.5745849609375, 144.43389892578125, 352.70416259765625, 232.60638427734375, 417.912353515625, 182.6982879638672, 504.1520690917969, 455.5024108886719, 317.99151611328125, -6.072113037109375, -77.83613586425781, 523.5443725585938, 364.66571044921875, -97.28095245361328, -58.48541259765625, 3.9652099609375, -129.59205627441406, 190.20254516601562, -77.96766662597656, 117.80388641357422, -82.51561737060547, 64.5335922241211], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000508.npy"}
|
|
{"epoch": 0.7459618208516887, "step": 509, "batch_size": 64, "mean": 190.74887084960938, "std": 245.2307891845703, "min": -261.2934875488281, "p10": -72.72847518920895, "median": 137.3287811279297, "p90": 543.4872192382813, "max": 990.7437133789062, "pos_frac": 0.828125, "sample": [72.98828887939453, -37.86663055419922, -261.2934875488281, 547.7821044921875, 147.10198974609375, 253.7712860107422, 24.591163635253906, 22.419960021972656, 367.0272216796875, 242.09530639648438, 414.71160888671875, 240.3670654296875, 213.8927001953125, 30.102907180786133, 80.54789733886719, 68.86683654785156, 753.8575439453125, -259.5091552734375, -211.04583740234375, 300.412109375, 548.3010864257812, 271.9466552734375, 128.46395874023438, 99.59525299072266, 338.5738525390625, 130.77207946777344, 120.3531494140625, 169.94979858398438, 324.0570068359375, 318.6466064453125, 168.6602325439453, -150.7835235595703, -9.672710418701172, -99.95332336425781, -29.2691650390625, 746.2184448242188, 260.42718505859375, 63.91102600097656, 228.3875732421875, -34.21589660644531, 91.46102905273438, 226.3234100341797, 80.40312194824219, 128.86669921875, 36.97956848144531, -87.66926574707031, 624.9561157226562, 533.4658203125, 143.88548278808594, 990.7437133789062, -126.1725082397461, 159.49493408203125, 33.34861373901367, 64.37237548828125, 311.4902038574219, 95.92140197753906, 75.15808868408203, 34.641448974609375, 236.128662109375, 331.3934326171875, 530.41162109375, 673.7901000976562, 102.2513427734375, 311.09307861328125], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000509.npy"}
|
|
{"epoch": 0.7474302496328928, "step": 510, "batch_size": 64, "mean": 238.2543182373047, "std": 227.6627197265625, "min": -387.6564636230469, "p10": -48.81079292297363, "median": 227.20343780517578, "p90": 544.3659301757813, "max": 646.9194946289062, "pos_frac": 0.796875, "sample": [-54.18587875366211, 323.435791015625, 644.94873046875, 359.0239562988281, 181.66311645507812, 172.44969177246094, 500.60321044921875, 530.7653198242188, -109.98426818847656, 359.35595703125, 337.10009765625, 291.6246643066406, -1.5642261505126953, 206.2154541015625, 550.1947631835938, 397.2058410644531, 401.58721923828125, -387.6564636230469, -82.91472625732422, 500.1112976074219, 295.6763610839844, 232.8634490966797, 261.73687744140625, 93.3634033203125, 188.13043212890625, 310.9117126464844, 410.1036682128906, 646.9194946289062, 502.8539123535156, -225.10198974609375, 565.2763671875, 422.41217041015625, -46.42514419555664, 141.85162353515625, 448.45880126953125, 195.50045776367188, 221.54342651367188, 170.7209930419922, -39.6347541809082, 157.7607421875, 127.41621398925781, 266.2506408691406, 488.0821228027344, 289.657958984375, -11.539871215820312, 2.9132308959960938, 143.7659454345703, 206.1812744140625, 176.56947326660156, 248.2882080078125, 592.9475708007812, -61.18529510498047, 309.29986572265625, 572.4014892578125, -13.328140258789062, 401.0096740722656, 204.77699279785156, 484.5503234863281, -49.833213806152344, -9.380531311035156, 17.36448097229004, 615.53369140625, 147.18228149414062, 24.450008392333984], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000510.npy"}
|
|
{"epoch": 0.748898678414097, "step": 511, "batch_size": 64, "mean": 240.81019592285156, "std": 307.2862548828125, "min": -708.3983764648438, "p10": -61.847165679931635, "median": 187.76668548583984, "p90": 629.1882446289064, "max": 1142.5740966796875, "pos_frac": 0.8125, "sample": [97.55445098876953, 748.3285522460938, 73.74655151367188, 209.85365295410156, 241.30572509765625, 144.00709533691406, 594.0435791015625, 707.6660766601562, 140.2073516845703, 188.1082763671875, 354.6298522949219, 454.1490478515625, 590.8603515625, 654.4193115234375, 13.032487869262695, 854.7703247070312, -708.3983764648438, 483.2113037109375, 124.61138916015625, 495.2728271484375, 498.6243591308594, 437.0337219238281, 61.40516662597656, 411.09063720703125, 156.28070068359375, 93.87885284423828, -393.4315185546875, 288.753173828125, -57.387939453125, 463.2769775390625, 485.6163330078125, 113.79768371582031, -27.185060501098633, 248.2118682861328, 237.82159423828125, 299.2530822753906, 461.28704833984375, 87.47958374023438, -51.50738525390625, 187.4250946044922, 427.227294921875, 241.23104858398438, 168.84767150878906, -170.32583618164062, -67.5057601928711, 644.250244140625, 329.4639587402344, -6.43250846862793, -63.758262634277344, 89.26954650878906, -181.07351684570312, 159.0048828125, 295.60699462890625, -40.910316467285156, 119.77059936523438, 305.87213134765625, 186.76495361328125, 5.627967834472656, 1142.5740966796875, 88.83036804199219, 304.5404968261719, -156.9752197265625, 175.94894409179688, 950.8995361328125], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000511.npy"}
|
|
{"epoch": 0.750367107195301, "step": 512, "batch_size": 64, "mean": 255.02108764648438, "std": 232.19998168945312, "min": -168.1901397705078, "p10": 1.023250961303713, "median": 224.0682373046875, "p90": 642.7748596191407, "max": 779.515625, "pos_frac": 0.90625, "sample": [634.6649169921875, 656.7305908203125, 359.56524658203125, -110.19525146484375, 29.839950561523438, -96.62594604492188, 646.2505493164062, 130.9302978515625, 28.716123580932617, 92.80550384521484, 49.29042053222656, 440.0576477050781, 2.9819393157958984, 263.96466064453125, -86.07290649414062, 91.75953674316406, 398.74945068359375, 303.65765380859375, 167.08148193359375, 0.18381309509277344, 60.27960968017578, 408.1089172363281, 23.085756301879883, 397.1815185546875, 779.515625, 381.90447998046875, 178.5278778076172, 256.30694580078125, 75.0744857788086, 420.2633972167969, 194.24249267578125, 299.8655700683594, 226.43399047851562, 364.39495849609375, 187.29249572753906, 139.02259826660156, 189.76438903808594, 221.70248413085938, -90.22907257080078, 558.21826171875, 66.8038330078125, 472.567138671875, 356.7493591308594, 654.042236328125, 18.054092407226562, -168.1901397705078, 397.4607849121094, 57.05742263793945, 40.98118591308594, 271.4557800292969, 694.1412353515625, 30.597938537597656, 186.0081329345703, 473.4606018066406, 144.93014526367188, 325.4626770019531, 391.6091003417969, 118.38639068603516, 338.2325439453125, 403.6419677734375, -20.550128936767578, 666.54248046875, 719.7201538085938, 406.89166259765625], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000512.npy"}
|
|
{"epoch": 0.7518355359765051, "step": 513, "batch_size": 64, "mean": 179.1253662109375, "std": 262.0176086425781, "min": -432.66058349609375, "p10": -98.87609405517578, "median": 181.23158264160156, "p90": 546.5902587890627, "max": 833.7711181640625, "pos_frac": 0.71875, "sample": [514.0587158203125, 260.12310791015625, 188.22996520996094, -10.671501159667969, 176.43426513671875, 242.37876892089844, 41.00714874267578, 170.1996612548828, -254.8042449951172, 564.48291015625, 349.8112487792969, 42.736083984375, 407.182373046875, 108.97953033447266, -32.12036895751953, 419.62744140625, 66.75302124023438, -94.87957763671875, 171.0681610107422, 253.00604248046875, 281.96881103515625, 495.99658203125, 635.2333984375, 315.84466552734375, 144.00897216796875, 98.95684814453125, 259.29986572265625, -95.88568115234375, 284.04693603515625, -31.880008697509766, 128.628662109375, 267.290283203125, 366.9515686035156, 485.43865966796875, 220.0687255859375, -21.553955078125, 41.76148986816406, -342.306640625, 322.4490966796875, 833.7711181640625, -100.15769958496094, 49.13243865966797, 252.5713348388672, -342.27410888671875, 581.9418334960938, -46.9677734375, -114.22454071044922, -23.412118911743164, 560.5323486328125, 563.650390625, -382.24322509765625, -30.781356811523438, 573.0933837890625, 462.8302307128906, -20.28240203857422, 228.22146606445312, -66.07559967041016, 159.2908172607422, 186.02890014648438, 223.19252014160156, -432.66058349609375, 461.611083984375, 149.90277099609375, 297.41156005859375], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000513.npy"}
|
|
{"epoch": 0.7533039647577092, "step": 514, "batch_size": 64, "mean": 193.9813995361328, "std": 263.1480712890625, "min": -263.85174560546875, "p10": -125.78681030273437, "median": 176.77396392822266, "p90": 554.3972106933594, "max": 973.189697265625, "pos_frac": 0.765625, "sample": [-227.61495971679688, -129.72802734375, 163.8613739013672, -133.5362548828125, 541.9204711914062, 155.5128173828125, 406.5547180175781, 393.43896484375, 973.189697265625, 575.5167236328125, 61.46479797363281, 239.49302673339844, -223.23446655273438, 211.347900390625, 464.71270751953125, -54.05406188964844, -96.55232238769531, 201.37002563476562, 593.3311157226562, 699.5623168945312, -189.02943420410156, 171.37493896484375, 312.3207702636719, -45.53455352783203, 25.50934600830078, 206.87954711914062, 110.73585510253906, 75.44378662109375, 248.95797729492188, 41.70763397216797, 450.6308898925781, 295.36749267578125, -32.519798278808594, 68.5147705078125, 44.154579162597656, 46.29273986816406, 627.7537841796875, -150.97683715820312, 376.98345947265625, -103.4541015625, 182.17298889160156, -263.85174560546875, -106.47126007080078, 275.28802490234375, 205.92471313476562, 31.633220672607422, -116.59063720703125, -74.47834777832031, 63.18859100341797, 420.2791748046875, 253.576416015625, 395.5218505859375, 120.10075378417969, 515.191162109375, 460.40484619140625, 143.8173828125, 263.77044677734375, 205.40628051757812, 559.744384765625, 247.8928680419922, 399.8194580078125, 18.991931915283203, 62.141845703125, 753.6658935546875], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000514.npy"}
|
|
{"epoch": 0.7547723935389133, "step": 515, "batch_size": 64, "mean": 173.0281982421875, "std": 230.31959533691406, "min": -555.2470092773438, "p10": -93.57955780029296, "median": 155.08379364013672, "p90": 463.3872436523439, "max": 629.0867919921875, "pos_frac": 0.8125, "sample": [27.005359649658203, 108.60423278808594, 352.91314697265625, 612.9996337890625, 177.16122436523438, -555.2470092773438, 417.19464111328125, 272.78729248046875, 16.98480224609375, -203.9583282470703, -99.71623229980469, 86.18402099609375, -8.50421142578125, 431.4532470703125, 373.4544677734375, 238.94406127929688, 19.457687377929688, 431.39837646484375, -5.07313346862793, 0.870025634765625, 258.81622314453125, -99.72428894042969, 146.66627502441406, 354.05169677734375, 148.8126678466797, -16.651565551757812, 498.3365478515625, -79.26065063476562, 196.94882202148438, 127.33605194091797, 566.3795776367188, 23.915185928344727, -30.779016494750977, 209.08160400390625, 357.90411376953125, 424.76861572265625, 348.5303955078125, 10.559196472167969, 629.0867919921875, -111.71969604492188, 139.57472229003906, 556.3665771484375, -301.77386474609375, 25.257972717285156, -217.44686889648438, 203.21734619140625, 68.10826873779297, 594.8510131835938, 64.10578918457031, 72.44723510742188, 214.22036743164062, 477.0732421875, 98.92533111572266, 124.4225845336914, 357.8347473144531, 412.1929016113281, 161.35491943359375, 40.0960693359375, 249.1099090576172, 339.97821044921875, 199.330322265625, 120.6661376953125, 181.34503173828125, 234.57455444335938], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000515.npy"}
|
|
{"epoch": 0.7562408223201175, "step": 516, "batch_size": 64, "mean": 246.72903442382812, "std": 224.00657653808594, "min": -249.1452178955078, "p10": -55.144797515869136, "median": 252.900634765625, "p90": 544.7236877441408, "max": 793.3154907226562, "pos_frac": 0.84375, "sample": [118.30988311767578, 157.60867309570312, 329.4941711425781, -47.82170867919922, 482.710205078125, 355.94390869140625, 353.9356384277344, -87.51515197753906, 233.23048400878906, 441.95068359375, 554.15576171875, 250.23214721679688, 255.56912231445312, 378.75299072265625, 144.4536590576172, -98.16757202148438, 342.0355529785156, 199.4142303466797, 80.36663818359375, 649.8341064453125, -249.1452178955078, 107.27166748046875, -58.28326416015625, 101.34889221191406, 268.249755859375, 257.80523681640625, 53.909568786621094, 633.36279296875, 743.8807373046875, 158.5143585205078, 75.6148681640625, 630.8271484375, 793.3154907226562, 304.6541748046875, 522.7155151367188, 277.4306945800781, 300.71905517578125, 51.330501556396484, 188.6217803955078, 129.72323608398438, 189.07896423339844, -106.85409545898438, 433.607666015625, 162.21914672851562, -40.716670989990234, 505.7379150390625, 193.56797790527344, 230.34100341796875, 60.924888610839844, 316.696533203125, 359.1539001464844, 397.20025634765625, 60.132232666015625, 284.28204345703125, 399.45123291015625, -109.13036346435547, -82.78146362304688, 68.61944580078125, 362.4639892578125, 350.4293212890625, 413.73333740234375, 558.2392578125, 430.6899719238281, -32.787166595458984], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000516.npy"}
|
|
{"epoch": 0.7577092511013216, "step": 517, "batch_size": 64, "mean": 242.6020050048828, "std": 265.0799255371094, "min": -244.21055603027344, "p10": -55.64597320556639, "median": 221.65390014648438, "p90": 585.1141479492188, "max": 1078.060302734375, "pos_frac": 0.84375, "sample": [176.23025512695312, 485.800048828125, 245.64413452148438, 72.33248901367188, 599.2952270507812, 80.06387329101562, 164.06922912597656, 273.1734924316406, 344.06536865234375, -244.21055603027344, 353.5803527832031, 890.6856079101562, 306.4967346191406, 247.55465698242188, 265.02093505859375, -39.370140075683594, 197.95913696289062, 271.6026611328125, 234.5124969482422, 573.8995361328125, 589.92041015625, 505.8560791015625, 39.299781799316406, 0.68426513671875, 50.447471618652344, -230.20608520507812, 96.01262664794922, 217.01571655273438, -24.007537841796875, 1078.060302734375, 189.0521697998047, 71.91661071777344, -23.997758865356445, 560.7101440429688, 29.8597412109375, 421.556640625, -128.4566650390625, 371.0485534667969, -62.62133026123047, 190.13870239257812, 420.42169189453125, 32.560157775878906, 229.43064880371094, 312.3123474121094, 405.81011962890625, 294.24114990234375, 391.9495544433594, 226.29208374023438, 16.99787139892578, 848.4224853515625, 120.28406524658203, 389.69256591796875, 267.56591796875, 715.9752197265625, -67.654296875, 677.3826904296875, 80.51713562011719, 192.36428833007812, -116.7109146118164, 180.99403381347656, -135.66543579101562, 431.41748046875, 146.52743530273438, 24.704612731933594], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000517.npy"}
|
|
{"epoch": 0.7591776798825257, "step": 518, "batch_size": 64, "mean": 209.58453369140625, "std": 262.3077697753906, "min": -389.29315185546875, "p10": -92.41258239746094, "median": 199.81842803955078, "p90": 614.3900634765627, "max": 811.531494140625, "pos_frac": 0.796875, "sample": [19.725154876708984, 626.414306640625, 289.57904052734375, 111.53172302246094, 213.3837127685547, 257.23870849609375, 270.0570983886719, 374.18365478515625, 39.19316482543945, 103.67668151855469, 224.1400604248047, 502.69854736328125, 108.55667114257812, -14.389694213867188, 295.2031555175781, 73.3138656616211, 202.18807983398438, 131.41888427734375, 24.219680786132812, 487.48736572265625, 582.8045654296875, 1.6493453979492188, -148.85910034179688, 197.4487762451172, -94.39923095703125, 291.59320068359375, 648.0396118164062, 146.2049560546875, 270.4515380859375, 676.440673828125, 667.2523193359375, -21.310317993164062, 655.9776611328125, 586.33349609375, 65.27701568603516, 359.0058288574219, 168.04507446289062, -45.000885009765625, 360.9191589355469, 444.2935485839844, 142.7982940673828, -87.77706909179688, 341.2892150878906, 101.53633880615234, 230.60305786132812, -86.41187286376953, -180.6138153076172, -389.29315185546875, 321.9247741699219, 368.9068298339844, -23.146987915039062, 37.83731460571289, -267.54974365234375, 165.93167114257812, -310.33599853515625, 811.531494140625, 391.0782165527344, 68.74786376953125, -136.60086059570312, 716.5960083007812, 252.1415252685547, 116.54774475097656, 429.0174255371094, 246.6646270751953], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000518.npy"}
|
|
{"epoch": 0.7606461086637298, "step": 519, "batch_size": 64, "mean": 185.1796417236328, "std": 249.2099609375, "min": -340.5010070800781, "p10": -112.15334548950192, "median": 172.05296325683594, "p90": 547.923098754883, "max": 953.5093994140625, "pos_frac": 0.796875, "sample": [28.222808837890625, 198.82823181152344, 193.07655334472656, -266.24334716796875, 658.5064086914062, -150.6890869140625, 225.18699645996094, 568.54931640625, 573.8734741210938, 314.3492431640625, 135.5217742919922, 499.7952575683594, -80.7552490234375, 609.9774780273438, 244.72360229492188, -272.0807800292969, 665.1080932617188, 335.2736511230469, 100.8175277709961, 108.79301452636719, 230.76268005371094, 44.32926940917969, 52.10327911376953, 119.62303161621094, 131.31134033203125, 90.93435668945312, 188.29733276367188, 124.97917175292969, 238.2467041015625, 71.17766571044922, -4.703857421875, 468.2421875, 173.90390014648438, -72.2994384765625, 255.14602661132812, 177.83755493164062, 216.38914489746094, -1.9011001586914062, -125.60967254638672, 106.31642150878906, 170.2020263671875, -16.255218505859375, 452.5159912109375, 177.57568359375, -340.5010070800781, 463.789794921875, 953.5093994140625, 28.350807189941406, 118.9903564453125, 122.41221618652344, 183.8950653076172, 714.1800537109375, -128.8767547607422, 442.6854248046875, -30.380615234375, 86.88545989990234, 416.8521423339844, 12.927555084228516, 204.45556640625, 243.78170776367188, 302.2633361816406, 87.12762451171875, 221.62777709960938, -212.43817138671875], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000519.npy"}
|
|
{"epoch": 0.762114537444934, "step": 520, "batch_size": 64, "mean": 204.956298828125, "std": 232.8690185546875, "min": -291.9980773925781, "p10": -27.868736457824703, "median": 139.9597625732422, "p90": 513.8541137695313, "max": 922.817626953125, "pos_frac": 0.84375, "sample": [37.75328826904297, 19.160400390625, 780.616943359375, 72.04786682128906, 280.9173889160156, 79.25146484375, 349.5017395019531, 11.119293212890625, 8.832267761230469, 176.32382202148438, 167.41395568847656, -29.257781982421875, 461.2149353027344, 126.52586364746094, -24.62763023376465, 135.14266967773438, 538.622314453125, 922.817626953125, -98.57992553710938, 114.53599548339844, 513.09814453125, 756.9141235351562, -291.9980773925781, 216.03887939453125, -72.23001098632812, 501.7608337402344, 109.84822845458984, 559.13623046875, 183.79840087890625, 308.96710205078125, 70.37237548828125, 142.2055206298828, 561.4061889648438, 95.37791442871094, 108.60076904296875, 201.52601623535156, 514.1781005859375, 332.4643249511719, 384.04052734375, -8.527618408203125, 321.7348937988281, 142.95114135742188, 137.71400451660156, 174.55416870117188, 61.856689453125, 505.5979309082031, -45.823890686035156, 293.1713562011719, 469.4529113769531, 11.234588623046875, 160.059814453125, 49.51646423339844, -75.40853881835938, 195.56240844726562, 111.42019653320312, 500.4184875488281, -12.53857421875, 249.64300537109375, -53.45610046386719, 285.6404724121094, 135.77865600585938, 18.901016235351562, 34.364410400390625, 128.54742431640625], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000520.npy"}
|
|
{"epoch": 0.7635829662261381, "step": 521, "batch_size": 64, "mean": 201.8736572265625, "std": 232.110107421875, "min": -199.82272338867188, "p10": -46.62580337524413, "median": 178.7651824951172, "p90": 530.1575408935548, "max": 930.0481567382812, "pos_frac": 0.75, "sample": [377.2547607421875, 582.7745361328125, 252.6663055419922, 23.33568572998047, -73.45337677001953, -163.66970825195312, -28.24591064453125, -5.2468109130859375, -27.32574462890625, 369.1507568359375, 50.838165283203125, -97.1810531616211, -21.506702423095703, 56.757118225097656, 422.1111755371094, 560.7005004882812, 55.469207763671875, -3.4110679626464844, -135.2971954345703, 610.4974975585938, 9.254741668701172, -38.83583068847656, -38.272972106933594, 115.39637756347656, 424.89813232421875, 477.4187316894531, 436.46429443359375, 148.7314453125, 183.90444946289062, -9.757354736328125, 378.30877685546875, 225.48916625976562, 35.990379333496094, 396.45794677734375, -49.96436309814453, 278.6514587402344, 244.55709838867188, 170.28976440429688, 503.6106262207031, 1.8006820678710938, 294.5721130371094, 331.16998291015625, 173.62591552734375, -11.117992401123047, 161.88958740234375, 331.82061767578125, -72.27503967285156, 471.685302734375, 52.944244384765625, -199.82272338867188, 333.4975891113281, 192.01856994628906, 605.2623901367188, 97.53044128417969, 89.3541259765625, 352.776611328125, 200.4122772216797, 541.5347900390625, 26.61498260498047, 930.0481567382812, 316.2171936035156, 191.36570739746094, 264.6257629394531, 543.5520629882812], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000521.npy"}
|
|
{"epoch": 0.7650513950073421, "step": 522, "batch_size": 64, "mean": 176.17190551757812, "std": 222.69422912597656, "min": -391.66082763671875, "p10": -48.19780006408691, "median": 177.74136352539062, "p90": 496.4937957763672, "max": 688.8167114257812, "pos_frac": 0.8125, "sample": [250.75950622558594, 242.6663055419922, 382.732666015625, -33.83556365966797, 554.2554321289062, 34.352413177490234, 557.575927734375, 187.02972412109375, 53.37411880493164, -20.225879669189453, -25.210674285888672, 67.39989471435547, 428.546630859375, 26.49272918701172, 367.13775634765625, 171.0390625, 293.895263671875, -45.8561897277832, 262.7313232421875, 173.30398559570312, 333.2980041503906, 470.17449951171875, 194.32247924804688, 182.7036895751953, -205.75384521484375, 688.8167114257812, 70.02405548095703, 167.80699157714844, 417.55560302734375, 188.24362182617188, 91.99603271484375, 187.92994689941406, 71.70890808105469, 47.949066162109375, 301.42120361328125, -391.66082763671875, 206.796630859375, -35.67182159423828, 215.73355102539062, -120.5916748046875, -123.58659362792969, 243.33126831054688, 484.7445068359375, 20.608070373535156, 26.283771514892578, 361.1689453125, 367.3294372558594, 106.93701171875, -389.28839111328125, 526.70361328125, 182.17874145507812, 40.150787353515625, 365.7476806640625, 107.42472076416016, 82.58056640625, -119.64257049560547, -49.20134735107422, 30.58563995361328, 501.7406005859375, 197.2403106689453, 501.5292053222656, 84.45718383789062, 584.2489624023438, 130.7616424560547], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000522.npy"}
|
|
{"epoch": 0.7665198237885462, "step": 523, "batch_size": 64, "mean": 198.3670654296875, "std": 228.6893768310547, "min": -559.266845703125, "p10": -49.19145202636718, "median": 205.6993408203125, "p90": 497.59558410644536, "max": 659.0677490234375, "pos_frac": 0.78125, "sample": [-33.90544128417969, 535.5668334960938, 196.6945343017578, 333.46600341796875, 345.026611328125, -130.8743438720703, 270.0107421875, 174.80734252929688, 408.8170166015625, 258.62359619140625, -214.99232482910156, 323.899658203125, 39.80784606933594, 275.34027099609375, -32.34193801879883, 19.370771408081055, 217.88494873046875, 513.6474609375, 237.04917907714844, 186.0709228515625, 214.7041473388672, 601.0826416015625, 173.97750854492188, -84.72789001464844, 218.0632781982422, 113.21199035644531, 184.62818908691406, 133.01150512695312, -25.06293487548828, 529.229248046875, 294.6903381347656, 50.87925720214844, 47.247528076171875, -559.266845703125, 316.6970520019531, 384.40704345703125, -173.68783569335938, -42.50398254394531, 315.5859375, 27.45429801940918, 351.5211486816406, 17.900880813598633, 659.0677490234375, 168.15170288085938, 317.0322265625, 71.71224975585938, 462.2483825683594, 193.19009399414062, 504.0450439453125, 151.70120239257812, 428.7698669433594, 155.16885375976562, 334.89776611328125, -29.21668243408203, 396.8472900390625, 418.875244140625, 240.44956970214844, -52.05751037597656, 390.60943603515625, 593.5808715820312, 482.5468444824219, -35.22743606567383, -159.99945068359375, -9.913204193115234], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000523.npy"}
|
|
{"epoch": 0.7679882525697503, "step": 524, "batch_size": 64, "mean": 202.36312866210938, "std": 214.25408935546875, "min": -417.4350280761719, "p10": -64.52940979003904, "median": 239.68251037597656, "p90": 429.82325439453126, "max": 633.0423583984375, "pos_frac": 0.8125, "sample": [82.88743591308594, 633.0423583984375, 222.63864135742188, 180.73236083984375, 179.96072387695312, 244.21987915039062, 150.29989624023438, 9.773811340332031, 372.73358154296875, 284.6561279296875, 390.61614990234375, 33.193389892578125, 252.33993530273438, 463.292236328125, 377.1097717285156, 261.4814453125, 7.35076904296875, 254.5044708251953, 178.8123016357422, 375.4337463378906, -417.4350280761719, 223.03329467773438, 238.8714599609375, 561.7607421875, 21.13970947265625, -92.5162353515625, 277.0683898925781, 240.49356079101562, 392.7228088378906, -15.96188735961914, 101.39633178710938, 79.15229797363281, 289.315185546875, 54.01048278808594, 428.4457702636719, -8.091367721557617, 424.22265625, 613.943603515625, 405.564697265625, 262.77239990234375, 93.98331451416016, -207.53457641601562, -13.651947021484375, -92.85863494873047, 430.4136047363281, -36.60009765625, 325.8946533203125, 11.838325500488281, 335.2393798828125, 367.8250427246094, 517.4474487304688, 433.764404296875, 231.7863311767578, -303.1074523925781, -16.258262634277344, 112.28016662597656, 372.6035461425781, 416.02740478515625, -118.5353012084961, 253.07647705078125, 380.1927795410156, 223.2050323486328, 275.7197265625, -76.49911499023438], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000524.npy"}
|
|
{"epoch": 0.7694566813509545, "step": 525, "batch_size": 64, "mean": 250.02285766601562, "std": 226.04864501953125, "min": -159.88861083984375, "p10": -19.646487808227526, "median": 224.70864868164062, "p90": 611.3516540527345, "max": 781.3381958007812, "pos_frac": 0.875, "sample": [243.33602905273438, 579.749267578125, 242.90028381347656, 161.57009887695312, 108.38207244873047, 422.82305908203125, 526.56787109375, 204.2021484375, 547.6846923828125, 237.696533203125, 234.94143676757812, -132.577880859375, 298.05218505859375, 120.13697814941406, 166.42120361328125, -37.10343933105469, 134.6526641845703, 118.39230346679688, 640.6473388671875, 311.40191650390625, 388.61810302734375, 151.60540771484375, 2.528533935546875, 259.62091064453125, 42.5220947265625, 342.1738586425781, 289.80780029296875, 297.68560791015625, 240.84463500976562, 582.7896118164062, 414.270751953125, 43.358055114746094, 671.3107299804688, 88.74007415771484, 663.0172119140625, 148.90814208984375, 9.874900817871094, 191.71145629882812, 309.31201171875, -25.531768798828125, 781.3381958007812, -159.88861083984375, -104.1466064453125, -5.914165496826172, 123.24050903320312, 206.839599609375, 287.1678161621094, 51.38064193725586, 385.0789794921875, 201.12478637695312, 516.0521240234375, 653.4519653320312, -58.800323486328125, 113.3900146484375, 450.306396484375, 213.77645874023438, 214.47586059570312, 1.3689804077148438, -145.45843505859375, 368.4360656738281, 249.99301147460938, 623.592529296875, 152.9342803955078, 638.6790161132812], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000525.npy"}
|
|
{"epoch": 0.7709251101321586, "step": 526, "batch_size": 64, "mean": 245.55255126953125, "std": 253.18148803710938, "min": -297.7247009277344, "p10": -101.5063262939453, "median": 272.4573211669922, "p90": 561.4905883789064, "max": 874.485107421875, "pos_frac": 0.8125, "sample": [840.7549438476562, 498.6871337890625, 124.41468811035156, 112.41860961914062, 128.55517578125, 188.67556762695312, -34.52906036376953, -83.7796401977539, 350.49090576171875, -297.7247009277344, 356.5771484375, 390.69329833984375, 295.7073669433594, 203.31777954101562, 607.498291015625, 752.7423095703125, -27.367891311645508, 425.62152099609375, 196.13299560546875, 400.632568359375, 217.4522247314453, 268.1626892089844, -112.94432067871094, 573.1114501953125, 372.35125732421875, 276.751953125, 874.485107421875, 152.2406463623047, 308.94134521484375, -7.253377914428711, -152.30601501464844, 384.3533020019531, -131.93539428710938, 153.88427734375, -105.33551025390625, 9.616851806640625, 465.73944091796875, 366.4953918457031, 123.08697509765625, -224.1766357421875, 384.26104736328125, 303.61651611328125, 302.90911865234375, 434.333740234375, -92.57156372070312, 327.9964599609375, 534.375244140625, 439.51123046875, 53.939979553222656, 371.40594482421875, 72.8826904296875, 151.46681213378906, 117.22122192382812, 673.1707153320312, 288.4655456542969, 360.5469665527344, 488.0059814453125, 138.9612579345703, 320.76617431640625, 113.01600646972656, 10.405891418457031, 160.48558044433594, -118.15193176269531, 636.1021118164062], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000526.npy"}
|
|
{"epoch": 0.7723935389133627, "step": 527, "batch_size": 64, "mean": 193.1898651123047, "std": 212.88937377929688, "min": -138.50186157226562, "p10": -57.93189392089844, "median": 173.41248321533203, "p90": 477.83602294921883, "max": 775.0420532226562, "pos_frac": 0.78125, "sample": [23.567527770996094, 429.3490295410156, 357.5185241699219, 289.32769775390625, 352.7033386230469, 207.48678588867188, 775.0420532226562, -27.451704025268555, -138.50186157226562, 167.04513549804688, 33.62309265136719, -33.422149658203125, 545.8419799804688, 152.38284301757812, -58.05976104736328, -57.63353729248047, 208.740234375, -23.822181701660156, 117.68212127685547, -99.57415771484375, -136.1750030517578, 454.44207763671875, 183.16744995117188, 102.51031494140625, 125.54339599609375, 423.6698303222656, -12.401870727539062, -29.402244567871094, 252.04812622070312, 435.369140625, 263.7961120605469, 308.50738525390625, 290.0758056640625, 307.0551452636719, 693.8976440429688, 653.8303833007812, 561.5286865234375, -67.35816192626953, 7.8188934326171875, 328.2320251464844, 543.05810546875, 149.57464599609375, 487.86199951171875, 147.19898986816406, -106.38275146484375, 188.3975372314453, 264.1234130859375, 134.64657592773438, 155.264404296875, -41.98982238769531, 182.9329376220703, 91.43255615234375, 179.7798309326172, 290.04876708984375, 72.22555541992188, 83.51213073730469, 397.8130187988281, 261.6471862792969, 279.1009826660156, 195.5762481689453, 121.28848266601562, -118.4747085571289, 8.642583847045898, 28.872329711914062], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000527.npy"}
|
|
{"epoch": 0.7738619676945668, "step": 528, "batch_size": 64, "mean": 222.0224609375, "std": 284.4155578613281, "min": -281.1898193359375, "p10": -96.91662139892577, "median": 178.26702880859375, "p90": 625.7464965820313, "max": 1173.71826171875, "pos_frac": 0.78125, "sample": [47.996177673339844, 265.51776123046875, 427.9174499511719, 195.67642211914062, 104.63861083984375, -245.4576416015625, -183.10009765625, 534.1903076171875, 491.77276611328125, 400.6039123535156, 285.3343505859375, -46.82505798339844, 227.01913452148438, 118.00692749023438, 234.57696533203125, 91.9894790649414, 271.3271484375, -64.61907958984375, 762.3417358398438, 1173.71826171875, -23.47406768798828, 497.674072265625, 99.812744140625, 248.2513427734375, 163.33299255371094, 658.9202270507812, -185.79092407226562, 758.4410400390625, 3.0270462036132812, 332.7773742675781, -281.1898193359375, 70.73193359375, 236.95590209960938, 38.689456939697266, 317.8537902832031, 171.95790100097656, 131.81106567382812, 311.88958740234375, 356.50701904296875, 216.98123168945312, 46.971519470214844, 592.78369140625, -14.940643310546875, -140.44406127929688, 639.8734130859375, 86.92827606201172, 385.4632873535156, 780.0579223632812, 184.57615661621094, 320.1371765136719, -101.72200012207031, -29.804306030273438, 163.7992401123047, 563.9043579101562, -85.70407104492188, 653.8321533203125, 337.2589111328125, 57.100032806396484, -184.06092834472656, -3.0724334716796875, 97.68667602539062, 28.076549530029297, 540.6825561523438, 72.26848602294922], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000528.npy"}
|
|
{"epoch": 0.775330396475771, "step": 529, "batch_size": 64, "mean": 163.04525756835938, "std": 248.8297576904297, "min": -393.41522216796875, "p10": -167.35433807373045, "median": 161.88912963867188, "p90": 448.15671997070314, "max": 901.6338500976562, "pos_frac": 0.734375, "sample": [127.08558654785156, -98.47328186035156, 217.61065673828125, -80.0987548828125, 179.5423583984375, 165.5515899658203, 191.2554931640625, 370.63775634765625, 160.66610717773438, 901.6338500976562, 94.27989959716797, 430.88836669921875, 163.11215209960938, -46.13513946533203, 285.553466796875, -55.22254180908203, 124.51799774169922, 79.38387298583984, 309.5673828125, 88.68636322021484, -116.86653900146484, 197.27484130859375, 613.6397705078125, 444.5989990234375, 580.2440185546875, -393.41522216796875, -30.423316955566406, 380.42529296875, -130.7550811767578, 522.2835693359375, 242.48175048828125, 138.08111572265625, -18.009374618530273, 373.6841735839844, 24.62762451171875, 9.481000900268555, 149.7589111328125, 224.26817321777344, 449.68145751953125, 412.8589782714844, -323.6160888671875, 155.50369262695312, 320.4342346191406, -298.05828857421875, 388.81341552734375, -248.4492950439453, 270.8269348144531, 111.5992431640625, 574.2003173828125, 79.45780181884766, -25.868730545043945, 288.74078369140625, -22.95465660095215, 247.70298767089844, -186.8664093017578, 166.51943969726562, 37.554283142089844, 373.947265625, 469.70721435546875, 418.3087158203125, -183.03973388671875, 38.70362854003906, -196.67477416992188, 294.4417724609375], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000529.npy"}
|
|
{"epoch": 0.7767988252569751, "step": 530, "batch_size": 64, "mean": 258.61761474609375, "std": 297.67669677734375, "min": -725.6277465820312, "p10": -64.26182785034179, "median": 242.63784790039062, "p90": 648.8939819335938, "max": 889.9013061523438, "pos_frac": 0.828125, "sample": [406.7175598144531, 24.54085922241211, 5.2891845703125, 138.2940673828125, 257.8124694824219, -73.56258392333984, 661.2080078125, 461.6934509277344, 137.75839233398438, -182.51007080078125, 188.3696746826172, -146.4903564453125, 53.607666015625, 46.879364013671875, 505.99176025390625, 350.0880126953125, 53.32240295410156, 214.16226196289062, -58.75975799560547, 584.0856323242188, 571.009765625, 367.5267028808594, 736.4869995117188, 238.00918579101562, 92.27610778808594, 327.31982421875, -25.7408447265625, 427.4971618652344, -66.61985778808594, 556.076416015625, -4.689847946166992, 777.6068115234375, 802.5491943359375, 273.4200439453125, 389.0578918457031, 91.39863586425781, 652.837158203125, 183.49349975585938, -120.55450439453125, -167.6507110595703, 61.04173278808594, 630.3365478515625, 328.5361022949219, 889.9013061523438, -46.9833984375, 325.9436340332031, 445.1501770019531, 68.93743133544922, 801.2236328125, 138.82015991210938, 225.78773498535156, 1.6012630462646484, 604.9865112304688, -725.6277465820312, 639.6932373046875, 269.48333740234375, 72.96047973632812, 541.7322387695312, 342.1377258300781, 469.1793212890625, 73.20614624023438, 283.4072570800781, 247.26651000976562, 132.9974365234375], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000530.npy"}
|
|
{"epoch": 0.7782672540381792, "step": 531, "batch_size": 64, "mean": 218.40756225585938, "std": 245.41896057128906, "min": -360.0121765136719, "p10": -56.910176849365236, "median": 190.09939575195312, "p90": 600.0263732910157, "max": 792.2417602539062, "pos_frac": 0.8125, "sample": [329.1016540527344, 3.016012191772461, 360.1595764160156, 697.9680786132812, 280.57611083984375, -56.43721008300781, 210.9646453857422, 19.701637268066406, 792.2417602539062, -41.20294189453125, 38.83924865722656, 357.1484069824219, 7.67646598815918, 324.5862731933594, 222.55636596679688, 188.01158142089844, 197.25924682617188, 14.813461303710938, -78.18057250976562, 173.8280487060547, 760.2471313476562, 164.05555725097656, 430.53753662109375, 74.70501708984375, 378.0591735839844, 192.1872100830078, 70.61094665527344, 301.5189208984375, 270.27532958984375, -57.112876892089844, 628.6235961914062, 220.67034912109375, 306.37591552734375, 169.91220092773438, 65.15498352050781, 176.01467895507812, -9.479301452636719, -98.61209106445312, -10.972614288330078, 474.4263916015625, 608.6459350585938, 398.17254638671875, 72.43697357177734, 31.671035766601562, 668.9083251953125, -360.0121765136719, 171.63165283203125, 454.84259033203125, 433.99530029296875, 552.9546508789062, 579.9140625, 135.71099853515625, 642.99267578125, 66.83658599853516, 306.64898681640625, -27.943984985351562, 233.1392822265625, 73.36396789550781, 32.945045471191406, -101.65141296386719, 266.9721374511719, 410.4188537597656, -95.57317352294922, -128.7630615234375], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000531.npy"}
|
|
{"epoch": 0.7797356828193832, "step": 532, "batch_size": 64, "mean": 225.17822265625, "std": 312.8587951660156, "min": -454.74432373046875, "p10": -155.50132369995114, "median": 208.72317504882812, "p90": 636.8425903320312, "max": 1017.369140625, "pos_frac": 0.78125, "sample": [636.81396484375, 333.75921630859375, 595.0405883789062, -24.623512268066406, 796.1199340820312, 192.2388916015625, 557.1448974609375, 505.6811218261719, -449.3454284667969, 181.4849090576172, -212.18247985839844, 391.0164794921875, 197.6605987548828, 478.8451232910156, -68.19020080566406, 117.92971801757812, 446.9234924316406, -416.627197265625, 216.55345153808594, 177.13906860351562, 386.6385498046875, 884.6231079101562, 676.1165161132812, 646.4412231445312, 283.03302001953125, 320.7958679199219, 14.672409057617188, 721.0613403320312, 617.6908569335938, 58.70648193359375, 57.91185760498047, -74.66165924072266, -82.81210327148438, 604.1405029296875, 314.5087585449219, 86.22677612304688, 250.89041137695312, 636.8548583984375, 119.00411224365234, 293.8354187011719, 0.35106658935546875, 460.93829345703125, 135.0465087890625, 318.3427429199219, 311.6878662109375, 61.831748962402344, 41.89331817626953, 447.0633544921875, 41.904624938964844, -30.776039123535156, -119.87212371826172, -226.08273315429688, 83.91145324707031, 1017.369140625, 228.88743591308594, 218.23651123046875, -4.418111801147461, -454.74432373046875, 116.5645751953125, 242.40866088867188, -170.77098083496094, 209.8907470703125, -194.87445068359375, 207.55560302734375], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000532.npy"}
|
|
{"epoch": 0.7812041116005873, "step": 533, "batch_size": 64, "mean": 178.4113006591797, "std": 276.98980712890625, "min": -363.465576171875, "p10": -136.90944213867186, "median": 173.7098617553711, "p90": 551.5017150878907, "max": 920.7947998046875, "pos_frac": 0.703125, "sample": [-71.01516723632812, -236.0121307373047, 471.41650390625, -131.24996948242188, 5.8276214599609375, 206.05023193359375, 753.9509887695312, 447.8988037109375, 291.651123046875, 191.93463134765625, 542.388427734375, 480.0901184082031, 481.318115234375, 555.4074096679688, -125.67523956298828, 394.98944091796875, 487.17431640625, 237.94766235351562, 20.20309829711914, -156.93923950195312, 363.19097900390625, -18.117431640625, 98.03375244140625, 448.9686279296875, 130.96348571777344, 348.8465576171875, 57.656089782714844, -354.50244140625, 44.62814712524414, 920.7947998046875, 126.45703125, 327.4965515136719, -17.119964599609375, -78.6972885131836, 118.88491821289062, -363.465576171875, -52.56468963623047, 198.62831115722656, -72.29426574707031, 206.36959838867188, 36.62880325317383, 181.5936737060547, 574.6678466796875, 636.52001953125, -74.21273803710938, 177.1250457763672, -170.35580444335938, -4.9058837890625, 223.82720947265625, 72.52694702148438, 256.6243591308594, 39.6158447265625, 444.3852233886719, 96.20928192138672, -139.33493041992188, -119.87305450439453, -214.64244079589844, 193.6207275390625, 272.36163330078125, 652.1710205078125, -104.34618377685547, 170.294677734375, 656.3291625976562, 279.97808837890625], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000533.npy"}
|
|
{"epoch": 0.7826725403817915, "step": 534, "batch_size": 64, "mean": 204.6661376953125, "std": 250.50303649902344, "min": -526.2147216796875, "p10": -53.16997299194335, "median": 183.16095733642578, "p90": 528.6654296875, "max": 772.1870727539062, "pos_frac": 0.796875, "sample": [180.0245361328125, 456.3977355957031, -19.271499633789062, 772.1870727539062, 56.31601333618164, 214.48590087890625, -259.572265625, 258.7196960449219, 252.5371856689453, -65.2120590209961, 122.14424896240234, 721.4632568359375, -58.501686096191406, 89.13638305664062, 495.4615173339844, 689.9014282226562, 374.76434326171875, 10.748664855957031, 340.4134216308594, 533.11474609375, 185.5565643310547, -31.760005950927734, 187.98721313476562, 334.3956298828125, 290.0633850097656, 231.32005310058594, 708.2388916015625, 32.664371490478516, 106.36347961425781, 180.76535034179688, 80.5321273803711, 526.3270263671875, 105.97908020019531, -113.0621109008789, 390.4773864746094, -526.2147216796875, -25.275421142578125, 435.5702209472656, 529.6676025390625, 394.9620056152344, 120.46255493164062, 66.10191345214844, -33.20611572265625, 132.47305297851562, 395.05889892578125, 176.75868225097656, 210.3804168701172, 274.9136962890625, 379.65521240234375, 697.2173461914062, 363.08929443359375, -25.52181053161621, 278.3081970214844, 105.91526794433594, 207.69671630859375, 80.86932373046875, 161.64840698242188, 108.50232696533203, 277.4590148925781, -40.72930908203125, 304.24896240234375, -306.88165283203125, -110.53241729736328, 84.9290771484375], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000534.npy"}
|
|
{"epoch": 0.7841409691629956, "step": 535, "batch_size": 64, "mean": 240.72836303710938, "std": 252.28305053710938, "min": -299.8365478515625, "p10": -50.294966888427716, "median": 192.3750457763672, "p90": 558.8560668945314, "max": 955.32568359375, "pos_frac": 0.8125, "sample": [-143.9027557373047, 955.32568359375, 61.13318634033203, -13.75164794921875, -112.00813293457031, 158.11688232421875, 87.006103515625, 500.7212219238281, 77.05096435546875, 575.4193115234375, 187.1576690673828, 286.88079833984375, 446.8549499511719, 736.8018798828125, 318.2065124511719, 231.3050079345703, -29.20696258544922, 736.7816772460938, 185.93179321289062, 264.91925048828125, 615.6018676757812, 336.95355224609375, 197.404541015625, 385.2497253417969, 170.85560607910156, 465.27093505859375, 377.85198974609375, 358.9803161621094, -13.886838912963867, 171.78173828125, 430.85498046875, 399.4951171875, 439.27197265625, 48.97904586791992, -34.642234802246094, -299.8365478515625, 445.0332336425781, 409.60986328125, 143.24465942382812, 213.48980712890625, 343.16339111328125, 696.103271484375, 72.3584213256836, 187.34555053710938, 280.4844665527344, -9.308456420898438, 111.70369720458984, 520.20849609375, -57.00328063964844, -115.9757080078125, 440.23583984375, 62.035362243652344, 296.30377197265625, 78.26726531982422, -91.70611572265625, -223.07879638671875, 651.571044921875, 66.27755737304688, 499.3836975097656, 130.6141357421875, 21.757837295532227, 381.7890319824219, 105.43878173828125, 186.33953857421875], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000535.npy"}
|
|
{"epoch": 0.7856093979441997, "step": 536, "batch_size": 64, "mean": 296.9447021484375, "std": 305.89300537109375, "min": -256.048583984375, "p10": 4.47441444396974, "median": 231.87681579589844, "p90": 672.7291931152344, "max": 1624.518310546875, "pos_frac": 0.890625, "sample": [155.8855743408203, 137.5102996826172, 249.15467834472656, 418.62677001953125, 944.3757934570312, 84.82322692871094, 194.49380493164062, 37.71356201171875, 26.59859848022461, -1.4912567138671875, 215.35671997070312, 245.66104125976562, 608.4039916992188, 700.5557250976562, 65.17411804199219, 454.8223876953125, 757.3042602539062, 96.74557495117188, 646.3165283203125, 469.8858642578125, 668.04345703125, 388.8880615234375, -159.80068969726562, 381.5469055175781, 268.0482482910156, 756.4229736328125, 23.87987518310547, 450.9345703125, 481.9697265625, 90.06982421875, 1624.518310546875, 568.502197265625, 257.9798583984375, 247.97763061523438, 387.35009765625, 262.85028076171875, 166.33982849121094, 674.7373657226562, 18.39431381225586, -23.04391860961914, 165.3993377685547, -242.88148498535156, 645.5513916015625, 218.09259033203125, 274.15087890625, -126.24125671386719, 126.79034423828125, 141.3651123046875, 257.0162353515625, 130.4320068359375, 210.55419921875, 93.8919906616211, 179.96359252929688, 175.100341796875, 202.83944702148438, 595.099609375, 154.63197326660156, -9.564411163330078, -256.048583984375, 138.9517822265625, 431.07861328125, 449.3508605957031, 293.5657043457031, 711.8433837890625], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000536.npy"}
|
|
{"epoch": 0.7870778267254038, "step": 537, "batch_size": 64, "mean": 228.16317749023438, "std": 278.6157531738281, "min": -432.7487487792969, "p10": -53.63830871582028, "median": 208.78282165527344, "p90": 596.9721191406251, "max": 803.6276245117188, "pos_frac": 0.859375, "sample": [-432.7487487792969, -282.4379577636719, 1.8800277709960938, 73.84168243408203, -66.72383117675781, 123.23152160644531, 46.57612609863281, 654.2306518554688, 23.469314575195312, 27.696041107177734, 389.24273681640625, -23.105422973632812, 391.3598327636719, 58.6713752746582, -179.73924255371094, 524.856689453125, 680.550537109375, -89.00196838378906, 304.8399963378906, 221.34405517578125, 73.68353271484375, 519.5985107421875, 60.616798400878906, -269.20709228515625, 347.68658447265625, 3.5070648193359375, 770.934326171875, 535.374267578125, 208.65914916992188, 308.361572265625, 744.9068603515625, 137.610595703125, 567.1321411132812, 338.21722412109375, 257.46575927734375, 211.99142456054688, 93.71673583984375, 436.0692138671875, 208.906494140625, 9.676101684570312, 22.079132080078125, 399.96380615234375, -4.6013641357421875, -362.81707763671875, 141.8043670654297, 543.3292236328125, 54.484920501708984, 211.55397033691406, 225.41018676757812, 609.7606811523438, 12.328506469726562, 525.0826416015625, 29.279563903808594, 355.2489013671875, 522.6179809570312, 60.14336395263672, 803.6276245117188, 105.35894775390625, 522.0075073242188, 317.22027587890625, 202.6830596923828, 628.2020263671875, 179.315673828125, 485.4193115234375], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000537.npy"}
|
|
{"epoch": 0.788546255506608, "step": 538, "batch_size": 64, "mean": 242.5530242919922, "std": 253.90835571289062, "min": -317.93817138671875, "p10": -41.343035507202146, "median": 261.0781936645508, "p90": 570.65075378418, "max": 844.0223999023438, "pos_frac": 0.796875, "sample": [54.17720031738281, 356.8282470703125, 459.4228515625, 605.6046752929688, 504.36767578125, -156.2183837890625, 619.6324462890625, 165.72488403320312, 270.5694580078125, 76.81391143798828, -32.56255340576172, 497.2385559082031, 461.954833984375, 358.57489013671875, 109.98353576660156, 496.77618408203125, 336.019287109375, -1.0741043090820312, 253.92701721191406, 688.6124267578125, 275.7989501953125, 122.63278198242188, 28.665496826171875, 451.16064453125, 100.90707397460938, 187.88975524902344, 120.0394287109375, 831.6767578125, -4.94769287109375, -79.13763427734375, -317.93817138671875, 446.5528564453125, 97.91111755371094, -90.75291442871094, 844.0223999023438, 283.1507568359375, -39.05649185180664, 291.37640380859375, 226.8861846923828, 507.9573669433594, 114.8494873046875, -123.17395782470703, 14.58291244506836, 499.44744873046875, 490.2542419433594, 480.232666015625, -21.84621810913086, 281.5573425292969, -69.51425170898438, 122.68991088867188, 277.759033203125, 293.95977783203125, 303.4541320800781, 295.2054748535156, 16.95534324645996, 88.91669464111328, 268.2293701171875, 102.92842102050781, 754.269287109375, 597.5193481445312, -42.32298278808594, -37.04491424560547, 375.4626770019531, 27.85371971130371], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000538.npy"}
|
|
{"epoch": 0.7900146842878121, "step": 539, "batch_size": 64, "mean": 198.35647583007812, "std": 244.5079345703125, "min": -240.77755737304688, "p10": -111.29121246337887, "median": 203.00382232666016, "p90": 512.0720672607422, "max": 766.5650634765625, "pos_frac": 0.75, "sample": [208.62771606445312, 292.87139892578125, 67.2162094116211, 23.494970321655273, 213.84378051757812, 366.2093200683594, -154.5695037841797, -70.37517547607422, 0.595733642578125, 528.2785034179688, -168.32928466796875, -16.431344985961914, 399.58685302734375, 200.0697784423828, 470.61029052734375, 474.0829162597656, 595.3512573242188, 209.74241638183594, 7.0306396484375, 433.04644775390625, 766.5650634765625, 260.2318115234375, 416.5, 510.3866882324219, 107.82207489013672, 15.385562896728516, -11.625381469726562, 512.7943725585938, 130.03659057617188, 26.241416931152344, -240.77755737304688, 261.2626953125, -212.607177734375, 550.896728515625, 240.37741088867188, -11.66265869140625, 18.857948303222656, 436.90924072265625, 62.102909088134766, -71.04946899414062, -128.5376739501953, 668.7219848632812, -170.35586547851562, 262.4012145996094, 91.12916564941406, -64.83297729492188, 489.52691650390625, -48.28257751464844, 275.1674499511719, -162.52151489257812, 205.9378662109375, -9.785118103027344, 647.790771484375, 337.2895812988281, 206.63323974609375, 174.8179473876953, 469.07147216796875, 350.4527587890625, 167.729248046875, 422.6878662109375, -53.79426574707031, 424.0068359375, 195.2034149169922, 94.75517272949219], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000539.npy"}
|
|
{"epoch": 0.7914831130690162, "step": 540, "batch_size": 64, "mean": 292.78631591796875, "std": 298.9877624511719, "min": -284.99566650390625, "p10": -101.37917327880855, "median": 274.6304931640625, "p90": 736.1140319824223, "max": 1029.152587890625, "pos_frac": 0.84375, "sample": [371.85931396484375, 355.2821960449219, 561.8959350585938, 63.16828918457031, 1018.9138793945312, 73.62289428710938, 210.63656616210938, -6.023193359375, -284.99566650390625, -226.174560546875, -156.8939208984375, 384.8335876464844, 171.5055694580078, 394.013427734375, 1029.152587890625, -15.376594543457031, 203.00827026367188, 145.19326782226562, 374.6267395019531, 439.2278137207031, -63.182891845703125, 75.72618865966797, 213.62374877929688, 360.8497314453125, 420.19232177734375, 549.0717163085938, 390.8832702636719, 834.5492553710938, 544.2349853515625, 231.52044677734375, 291.4062194824219, 268.56854248046875, 305.27362060546875, 142.04354858398438, 210.27999877929688, 404.2162780761719, 76.11166381835938, 1.80621337890625, 488.9756774902344, 206.60223388671875, 543.505615234375, 911.1071166992188, 837.7327880859375, 807.9908447265625, -163.65774536132812, 644.8145751953125, 141.35406494140625, 280.69244384765625, 315.4415283203125, 310.6866455078125, -117.74900817871094, 170.40286254882812, 480.6275634765625, 425.1955871582031, 175.93060302734375, 256.2666015625, 580.77587890625, 161.84230041503906, 775.2423706054688, 2.3830490112304688, -121.88530731201172, 408.6645202636719, 59.45451354980469, -208.72665405273438], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000540.npy"}
|
|
{"epoch": 0.7929515418502202, "step": 541, "batch_size": 64, "mean": 183.6807861328125, "std": 271.8061218261719, "min": -339.03289794921875, "p10": -145.98566436767578, "median": 168.5234832763672, "p90": 530.6262939453126, "max": 1040.1217041015625, "pos_frac": 0.734375, "sample": [-243.09237670898438, 164.996826171875, 333.0706481933594, 149.73538208007812, 102.62325286865234, 117.53378295898438, 232.53512573242188, 86.38032531738281, -4.079010009765625, -76.84393310546875, -105.50473022460938, 813.322021484375, -148.46424865722656, 74.78565979003906, 297.8744201660156, 24.96308135986328, 223.2867431640625, 881.360595703125, 102.7217025756836, 277.0075378417969, 1040.1217041015625, 394.5867614746094, 177.93264770507812, -153.4844207763672, 172.05014038085938, -186.39031982421875, 151.75173950195312, 158.38613891601562, -339.03289794921875, 515.3148193359375, 315.5182189941406, 54.05284881591797, 411.9713439941406, 127.44468688964844, -16.7740478515625, 148.09780883789062, 259.1039123535156, 190.28468322753906, 454.71697998046875, 192.8365020751953, -85.55018615722656, 234.0835418701172, 381.4735412597656, 573.8124389648438, 537.1883544921875, 246.38311767578125, -112.83134460449219, -208.17169189453125, 415.74017333984375, 411.4645690917969, -93.86087799072266, 201.90261840820312, 320.6511535644531, 695.886962890625, 546.4553833007812, 3.8986549377441406, -179.66456604003906, 129.53189086914062, -140.20230102539062, -106.86446380615234, -7.101493835449219, 202.73434448242188, 234.14053344726562, 181.76841735839844], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000541.npy"}
|
|
{"epoch": 0.7944199706314243, "step": 542, "batch_size": 64, "mean": 241.8713836669922, "std": 274.4323425292969, "min": -368.9163818359375, "p10": -79.79032897949217, "median": 196.56881713867188, "p90": 613.2061218261719, "max": 962.9525146484375, "pos_frac": 0.8125, "sample": [147.20399475097656, 151.43634033203125, 204.23074340820312, 109.36539459228516, -106.99193572998047, 703.3947143554688, -148.50299072265625, 962.9525146484375, 564.8955688476562, 239.09600830078125, 116.13761901855469, 409.2254638671875, 450.7582702636719, -84.00143432617188, 70.66800689697266, 365.9061279296875, 643.6494750976562, 341.4437255859375, -40.672264099121094, 781.7078857421875, 161.06396484375, 252.31468200683594, 283.69464111328125, 74.24114990234375, -189.72634887695312, 182.81980895996094, 7.54473876953125, -26.47087860107422, -69.96441650390625, 634.676025390625, -58.60680389404297, 533.1455078125, 339.741455078125, 76.83160400390625, 651.760009765625, 33.04018783569336, 451.66583251953125, 155.97604370117188, -172.77304077148438, 408.5809326171875, 334.09600830078125, 304.1253356933594, 177.5619354248047, -66.10053253173828, 599.365234375, 207.50051879882812, 171.1187286376953, -362.89239501953125, 527.2687377929688, 104.97289276123047, 159.291259765625, 329.5616149902344, 63.12809753417969, 356.43743896484375, 616.2477416992188, 387.60552978515625, 109.86261749267578, -368.9163818359375, 563.2172241210938, 188.90689086914062, 122.8319320678711, 321.2607116699219, 415.7500915527344, 606.1090087890625], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000542.npy"}
|
|
{"epoch": 0.7958883994126285, "step": 543, "batch_size": 64, "mean": 180.1173095703125, "std": 288.71630859375, "min": -448.81927490234375, "p10": -100.7624755859375, "median": 101.21765518188477, "p90": 551.1015563964844, "max": 972.77978515625, "pos_frac": 0.78125, "sample": [19.29010009765625, -94.572021484375, 279.8548583984375, 0.10284423828125, 129.66632080078125, 895.6651611328125, 5.066667556762695, 12.964218139648438, -448.81927490234375, 425.9051208496094, 4.672386169433594, -273.1855163574219, 44.06594467163086, 218.99017333984375, 810.7669677734375, 88.49217224121094, 43.80564880371094, 826.8862915039062, -293.3507995605469, 13.330802917480469, 355.93414306640625, 45.00159454345703, 24.88442611694336, 29.060895919799805, -159.17129516601562, 113.9431381225586, -103.41552734375, 432.25537109375, 135.92218017578125, 25.353271484375, 479.44573974609375, 142.8269500732422, -252.57965087890625, 610.5731201171875, -54.311248779296875, 69.75456237792969, 972.77978515625, 468.6029968261719, 542.907958984375, 34.77330780029297, 554.6130981445312, -40.71757507324219, -81.51683044433594, 159.69203186035156, 537.356201171875, 56.75872039794922, 363.6658020019531, 583.249267578125, -117.03904724121094, 376.80352783203125, 213.05795288085938, -28.27642059326172, 163.13844299316406, -40.882896423339844, 312.23065185546875, 431.32525634765625, 235.32662963867188, 62.364952087402344, 58.66175842285156, 249.5731201171875, -22.027626037597656, 204.78228759765625, 280.9092102050781, 390.31951904296875], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000543.npy"}
|
|
{"epoch": 0.7973568281938326, "step": 544, "batch_size": 64, "mean": 256.15838623046875, "std": 255.20523071289062, "min": -253.7135009765625, "p10": -0.3737176895141552, "median": 215.5497283935547, "p90": 575.4139648437501, "max": 1177.313720703125, "pos_frac": 0.890625, "sample": [451.12738037109375, 244.93536376953125, 291.32940673828125, 588.6219482421875, 1177.313720703125, -236.10971069335938, 345.68951416015625, -2.4238815307617188, 395.9535217285156, -27.190095901489258, 842.580078125, 48.59059143066406, 327.8607482910156, 192.9065704345703, 31.283546447753906, 374.2760314941406, 267.58148193359375, 209.57901000976562, 171.1982421875, 336.8326416015625, 294.1339111328125, 285.64892578125, 60.598289489746094, 174.62843322753906, 192.19931030273438, 184.3469696044922, 666.8350830078125, 105.89862060546875, -129.82777404785156, 4.409997940063477, 484.760009765625, 221.52044677734375, 60.48651885986328, 445.8389587402344, -192.89707946777344, 195.7971649169922, 384.1005859375, 182.3638458251953, 270.0912780761719, -21.14900016784668, 278.0252990722656, 180.55233764648438, 202.166015625, 544.5953369140625, 383.374755859375, 113.60722351074219, 338.40875244140625, 744.308349609375, 356.5867919921875, 74.52557373046875, 7.376430511474609, 125.48090362548828, 390.75653076171875, 751.50732421875, 407.4311828613281, 8.070232391357422, 322.9862976074219, 15.004837036132812, 449.2488708496094, 123.68766784667969, 171.5966339111328, 629.3089599609375, -253.7135009765625, 127.52413177490234], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000544.npy"}
|
|
{"epoch": 0.7988252569750367, "step": 545, "batch_size": 64, "mean": 245.11962890625, "std": 263.60601806640625, "min": -300.8670959472656, "p10": -95.8049507141113, "median": 247.42096710205078, "p90": 628.8767150878907, "max": 813.4127197265625, "pos_frac": 0.78125, "sample": [34.43937683105469, -35.989776611328125, -300.8670959472656, -16.02392578125, 132.68141174316406, 392.1752014160156, 352.0421142578125, 26.115129470825195, 317.4558410644531, 31.917823791503906, 426.9936828613281, 666.5421142578125, -221.8082275390625, 426.00970458984375, 599.42822265625, -15.646842956542969, 527.1165771484375, 468.9256286621094, 677.5213012695312, 254.20408630371094, 413.1820068359375, 200.78012084960938, 318.3419189453125, 144.8347930908203, -2.4161758422851562, 92.14352416992188, -122.8270263671875, 268.35321044921875, 449.7065124511719, -4.953393936157227, 670.3552856445312, 146.2770538330078, 177.6908721923828, -108.03361511230469, 728.0736083984375, 426.67041015625, 90.89790344238281, 685.133544921875, -120.39250183105469, 269.93182373046875, -9.390586853027344, 441.5307312011719, 144.64700317382812, 336.10687255859375, 197.7838134765625, 431.77191162109375, 423.0799560546875, 813.4127197265625, 240.63784790039062, 254.82708740234375, -67.27140045166016, 60.010528564453125, 4.453926086425781, 221.3653106689453, 344.4032287597656, 576.7264404296875, 641.4974975585938, -131.20083618164062, 281.97747802734375, 121.32537078857422, 422.7287902832031, -223.69082641601562, 99.91626739501953, 594.0250244140625], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000545.npy"}
|
|
{"epoch": 0.8002936857562408, "step": 546, "batch_size": 64, "mean": 211.33428955078125, "std": 223.456787109375, "min": -417.69036865234375, "p10": -41.79097824096679, "median": 225.61420440673828, "p90": 466.4637481689453, "max": 708.8922729492188, "pos_frac": 0.875, "sample": [-96.32638549804688, 43.24469757080078, 394.703125, 94.99917602539062, 301.2330322265625, 316.798583984375, 111.78845977783203, 222.16726684570312, 194.4204864501953, 148.65325927734375, -45.13282012939453, -116.10574340820312, 29.118453979492188, 69.3341064453125, 285.7342529296875, 39.62739181518555, 48.53014373779297, 338.9877014160156, 467.1176452636719, 90.2724609375, 13.656471252441406, 568.8673095703125, -183.85952758789062, 407.6651916503906, 362.0990295410156, 338.064697265625, 239.22740173339844, 293.68701171875, 454.07818603515625, 205.13461303710938, 446.082763671875, -235.13966369628906, 136.24948120117188, 217.53463745117188, 402.3843994140625, -346.37225341796875, 380.5103759765625, 312.939697265625, 479.425537109375, 708.8922729492188, -417.69036865234375, 464.93798828125, 362.4313049316406, 115.70703125, 339.0180969238281, 653.7786254882812, -33.99334716796875, 383.82916259765625, 98.37215423583984, 229.06114196777344, 108.53162384033203, 281.00384521484375, 157.31088256835938, 287.7578430175781, 11.044290542602539, 476.7685852050781, 97.91058349609375, 361.9559326171875, 0.28569793701171875, 175.81820678710938, 27.62230110168457, 370.6711120605469, 351.3086242675781, 481.6598815917969], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000546.npy"}
|
|
{"epoch": 0.801762114537445, "step": 547, "batch_size": 64, "mean": 212.7323760986328, "std": 275.70928955078125, "min": -656.8561401367188, "p10": -68.51444931030272, "median": 167.9921875, "p90": 513.674951171875, "max": 949.7870483398438, "pos_frac": 0.78125, "sample": [51.98919677734375, 255.921875, -656.8561401367188, 642.48095703125, 46.23048782348633, -70.35174560546875, 103.1314468383789, 119.57714080810547, -17.60693359375, 494.0993347167969, 130.16635131835938, 783.0106811523438, 97.88128662109375, 949.7870483398438, -34.79756164550781, 29.03874969482422, 433.63580322265625, 91.08454132080078, -148.93052673339844, 470.44732666015625, 40.90766143798828, 99.68500518798828, 389.0523376464844, 167.39797973632812, 418.0899353027344, 103.7371826171875, 393.6493225097656, 299.9658203125, 192.6237335205078, 232.47811889648438, 580.01171875, 108.90693664550781, 257.0633239746094, 497.6990051269531, 296.9780578613281, 134.84616088867188, -26.683155059814453, 312.83380126953125, 278.4685974121094, -73.3333740234375, 549.3890380859375, 905.9286499023438, 258.134033203125, -154.0705108642578, -39.653167724609375, -64.22742462158203, -252.13926696777344, 418.0722351074219, 90.69438171386719, 262.26422119140625, 503.3802490234375, 34.96183776855469, 218.5845947265625, 316.83331298828125, 122.26644897460938, 113.02638244628906, 492.099853515625, -4.3035888671875, 374.73486328125, -41.657470703125, 508.09716796875, 516.0654296875, 168.58639526367188, -156.51312255859375], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000547.npy"}
|
|
{"epoch": 0.8032305433186491, "step": 548, "batch_size": 64, "mean": 220.9075469970703, "std": 292.2630310058594, "min": -529.440185546875, "p10": -81.8370246887207, "median": 190.6605987548828, "p90": 505.24591674804697, "max": 1334.3668212890625, "pos_frac": 0.796875, "sample": [84.11878967285156, 357.78997802734375, 89.7442398071289, 267.4267883300781, -67.27741241455078, 52.647071838378906, -529.440185546875, 917.8369140625, 321.8299560546875, 256.0027770996094, 336.1671447753906, 343.32830810546875, 477.40045166015625, 543.1235961914062, 349.5796813964844, -140.808837890625, -2.4714527130126953, 396.0585632324219, 471.3310852050781, 37.672157287597656, 555.3456420898438, 307.0313415527344, 174.3671875, 92.92499542236328, 466.8493347167969, -66.88056182861328, 306.48089599609375, 177.6767578125, 294.8084411621094, 211.1990509033203, -151.58139038085938, 1334.3668212890625, 444.4501953125, 143.75616455078125, 152.73602294921875, -79.72533416748047, -47.75218200683594, 465.8795166015625, -150.63876342773438, 813.1329345703125, 91.68624877929688, -76.98526000976562, -124.19355773925781, 238.20236206054688, 14.294492721557617, 21.425811767578125, 216.3666229248047, 102.22021484375, 280.1148681640625, -82.74203491210938, 312.94580078125, 298.5596008300781, -95.76425170898438, 966.7986450195312, 240.78628540039062, 148.59161376953125, 97.04911804199219, 136.46163940429688, 163.3125457763672, 203.64443969726562, 142.00314331054688, 315.5321960449219, 4.106498718261719, 517.1796875], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000548.npy"}
|
|
{"epoch": 0.8046989720998532, "step": 549, "batch_size": 64, "mean": 194.37591552734375, "std": 256.4853820800781, "min": -495.82672119140625, "p10": -68.34129638671875, "median": 149.6064682006836, "p90": 543.5590209960939, "max": 922.4891967773438, "pos_frac": 0.828125, "sample": [196.0888671875, 631.987548828125, 129.18580627441406, 99.2964096069336, 130.64796447753906, 334.98431396484375, -54.85078430175781, 329.13140869140625, 136.68780517578125, 314.0793762207031, 26.991016387939453, 424.99139404296875, -26.327903747558594, 206.67608642578125, 47.8783073425293, -220.37242126464844, 564.9962158203125, -142.42205810546875, -140.19760131835938, 29.900901794433594, 324.8686218261719, 261.0733337402344, -70.03516387939453, 239.7470703125, 266.870849609375, 150.67347717285156, 194.95541381835938, 145.3602294921875, 326.4515686035156, 503.89862060546875, 58.57110595703125, 119.04525756835938, 272.6846618652344, 12.377937316894531, 285.3462219238281, -242.895751953125, 173.04031372070312, 568.8099365234375, 118.26094818115234, 4.074909210205078, -159.11343383789062, -12.327690124511719, 318.3148498535156, 223.51988220214844, 922.4891967773438, 75.84025573730469, 70.89308166503906, 360.1868591308594, 115.181640625, 886.9512939453125, 560.5563354492188, -495.82672119140625, 5.58534049987793, 432.4302978515625, 301.601806640625, -64.3889389038086, 148.53945922851562, 66.692626953125, 367.99810791015625, 785.5364379882812, 72.91398620605469, 349.1463623046875, 57.53211975097656, 317.2727966308594], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000549.npy"}
|
|
{"epoch": 0.8061674008810573, "step": 550, "batch_size": 64, "mean": 190.26931762695312, "std": 215.7088165283203, "min": -244.71759033203125, "p10": -77.32337341308593, "median": 193.34355926513672, "p90": 427.67787170410156, "max": 705.9288940429688, "pos_frac": 0.8125, "sample": [171.80917358398438, 92.96133422851562, 579.08447265625, 151.36019897460938, 275.5218811035156, 426.19927978515625, 366.356201171875, 29.532913208007812, 29.678478240966797, -57.87416076660156, 229.06417846679688, 248.75750732421875, 390.5501403808594, 247.90997314453125, 131.07374572753906, 589.29248046875, 127.8847885131836, -65.38282775878906, 68.9197998046875, 377.1523132324219, -244.71759033203125, -25.66107177734375, 354.11077880859375, 322.81610107421875, -141.85220336914062, 479.881103515625, 330.3858642578125, 131.87283325195312, 218.75454711914062, 39.94682312011719, 25.25958251953125, -214.1815185546875, 269.81903076171875, -64.25142669677734, 121.57697296142578, 705.9288940429688, 370.4754638671875, 302.86578369140625, 638.031005859375, 307.4034118652344, 268.65460205078125, 92.58763122558594, 14.898731231689453, -25.00189208984375, 404.29534912109375, 199.50518798828125, 361.14398193359375, 321.0302734375, 151.10867309570312, 312.006103515625, 196.08926391601562, 536.8305053710938, -82.44075012207031, 379.35211181640625, -243.96304321289062, 77.70414733886719, 180.50010681152344, 428.3115539550781, 214.53118896484375, -159.45016479492188, 190.5978546142578, -159.3944091796875, 163.12844848632812, 16.894638061523438], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000550.npy"}
|
|
{"epoch": 0.8076358296622613, "step": 551, "batch_size": 64, "mean": 289.88714599609375, "std": 249.9429473876953, "min": -268.72039794921875, "p10": -22.0596710205078, "median": 293.4209289550781, "p90": 585.746484375, "max": 802.7708740234375, "pos_frac": 0.859375, "sample": [20.876224517822266, -10.13824462890625, 289.1367492675781, 276.94793701171875, 379.36370849609375, 462.0092468261719, -171.7373046875, 38.0860481262207, 710.5216064453125, 250.46592712402344, -6.149574279785156, 159.23814392089844, -53.79304504394531, 480.78387451171875, 541.1742553710938, 557.9473876953125, 448.04168701171875, 239.6524658203125, 295.86505126953125, 802.7708740234375, 588.52734375, 290.976806640625, 560.87548828125, 579.2578125, 353.49530029296875, 137.1099395751953, 659.1275634765625, 209.6206817626953, 95.36786651611328, 319.9372863769531, 151.3685760498047, 10.50844955444336, 547.05126953125, 110.20364379882812, 309.560302734375, 367.3993225097656, 361.62652587890625, 498.445556640625, 398.90911865234375, 62.81443786621094, -184.19186401367188, 494.881591796875, 476.5159912109375, 313.8165588378906, -138.16539001464844, 762.3260498046875, -27.168853759765625, -268.72039794921875, 668.954345703125, 47.350746154785156, -36.108665466308594, 130.60110473632812, 394.2272644042969, 563.4213256835938, 288.10015869140625, 53.09910583496094, 532.3734130859375, 371.9034729003906, 241.21969604492188, 335.43048095703125, 211.1098175048828, 704.4944458007812, 183.78924560546875, 110.27042388916016], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000551.npy"}
|
|
{"epoch": 0.8091042584434655, "step": 552, "batch_size": 64, "mean": 319.620849609375, "std": 314.95965576171875, "min": -236.82073974609375, "p10": -1.574386978149402, "median": 263.52679443359375, "p90": 789.092431640625, "max": 1210.377197265625, "pos_frac": 0.890625, "sample": [175.8029022216797, 160.5045166015625, 231.6033935546875, 968.27978515625, 263.4511413574219, 796.8678588867188, 506.59173583984375, 350.0115051269531, 24.64556884765625, 219.43743896484375, 429.60919189453125, 413.0370788574219, 86.64826965332031, 313.34320068359375, 108.75840759277344, 456.08819580078125, 200.715087890625, 653.5526123046875, 952.2656860351562, 33.10809326171875, 152.7606201171875, 405.803955078125, 371.753173828125, 263.6024475097656, -236.82073974609375, 22.338741302490234, 32.872947692871094, 431.3135681152344, -69.92347717285156, -204.4185791015625, 760.295166015625, -171.3303680419922, 168.1809539794922, 58.914878845214844, 289.063232421875, 229.15272521972656, 396.94049072265625, 1051.124755859375, 978.7225952148438, 667.492919921875, 373.66607666015625, 10.613044738769531, 275.52081298828125, 504.1600036621094, 560.7401123046875, 1210.377197265625, 248.48199462890625, -12.055404663085938, 849.933349609375, 770.9497680664062, 246.20079040527344, 27.435386657714844, 141.98056030273438, -6.744899749755859, 10.490142822265625, 245.55169677734375, -34.06440734863281, 182.65260314941406, 265.05609130859375, 326.4719543457031, 451.9853515625, 433.359619140625, 384.7291259765625, 46.08354949951172], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000552.npy"}
|
|
{"epoch": 0.8105726872246696, "step": 553, "batch_size": 64, "mean": 204.2652587890625, "std": 266.97454833984375, "min": -278.9571533203125, "p10": -120.18477020263671, "median": 181.20721435546875, "p90": 576.392724609375, "max": 1201.72607421875, "pos_frac": 0.796875, "sample": [1201.72607421875, 674.7041015625, -54.83099365234375, 229.55252075195312, 335.19970703125, -167.45767211914062, 43.27046203613281, -32.22437286376953, 107.33094787597656, 253.92901611328125, 94.70632934570312, 173.23695373535156, 128.9571533203125, 210.59347534179688, 65.20879364013672, 285.5567626953125, -121.767822265625, 17.675933837890625, 263.0433654785156, -116.49098205566406, 274.8043518066406, 176.82427978515625, 301.4310607910156, -88.89508819580078, 378.8739318847656, 28.515670776367188, 607.1779174804688, -3.03302001953125, 27.99433135986328, 366.31085205078125, -158.94703674316406, 286.42181396484375, 123.3564453125, -174.9723358154297, 216.25161743164062, 168.76239013671875, 158.39544677734375, 185.0731201171875, 541.177001953125, 11.141944885253906, 187.9840087890625, -68.36106872558594, 365.6831359863281, 261.1195983886719, 7.355674743652344, 96.8480224609375, -167.0544891357422, 259.15704345703125, 492.55169677734375, 105.8775863647461, 581.303466796875, 226.42587280273438, 602.013916015625, 317.0137939453125, 652.8419799804688, 564.934326171875, 7.739692687988281, 382.9765319824219, -210.65060424804688, 756.0477294921875, -278.9571533203125, 382.5571594238281, 177.34130859375, 351.6436767578125], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000553.npy"}
|
|
{"epoch": 0.8120411160058737, "step": 554, "batch_size": 64, "mean": 271.5777587890625, "std": 280.29656982421875, "min": -407.8146667480469, "p10": -107.17077255249022, "median": 260.02171325683594, "p90": 669.3835205078126, "max": 946.29052734375, "pos_frac": 0.828125, "sample": [160.43821716308594, -89.51789855957031, 260.6663818359375, 157.2747802734375, 411.5799255371094, 183.44644165039062, 370.4909973144531, 372.180419921875, 174.6551513671875, 325.34625244140625, 604.8193359375, 233.133056640625, 423.9586486816406, 280.81781005859375, -122.06465148925781, 423.4235534667969, 539.0165405273438, 213.71932983398438, 321.362060546875, 162.53729248046875, -114.73628997802734, -258.4983215332031, -5.192289352416992, 29.370994567871094, 946.29052734375, -20.99958038330078, 248.32083129882812, 389.3893737792969, 219.81756591796875, 362.0703430175781, 324.2223205566406, 730.2554931640625, 838.159423828125, 129.0642852783203, 574.319580078125, 251.45999145507812, 757.5826416015625, 126.84225463867188, 658.5828247070312, -121.03269958496094, 213.2132568359375, 329.39373779296875, 355.5550537109375, 466.5166931152344, 464.9795837402344, 34.8040657043457, 284.11407470703125, 12.8182373046875, 302.081787109375, 285.345947265625, -172.12841796875, 259.3770446777344, 253.51510620117188, -407.8146667480469, 674.0123901367188, 54.91845703125, -190.59286499023438, 92.05292510986328, 511.82122802734375, 435.1387939453125, 834.9586181640625, 743.1610107421875, 97.4298324584961, -26.269107818603516], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000554.npy"}
|
|
{"epoch": 0.8135095447870778, "step": 555, "batch_size": 64, "mean": 273.369873046875, "std": 322.7062683105469, "min": -429.8540954589844, "p10": -67.28526153564448, "median": 195.4525375366211, "p90": 770.991033935547, "max": 1047.426513671875, "pos_frac": 0.8125, "sample": [352.98876953125, 288.12164306640625, 365.6274108886719, 420.2784118652344, 457.34124755859375, 676.103759765625, 205.38096618652344, 329.7148742675781, -16.18841552734375, 135.989990234375, -111.13035583496094, 660.9662475585938, 606.7559814453125, 313.79278564453125, 65.95443725585938, -17.254209518432617, 138.99295043945312, -16.283294677734375, 1041.8692626953125, 781.3283081054688, 150.97149658203125, 839.8792114257812, 200.42715454101562, 535.0722045898438, 342.8710632324219, 165.28128051757812, 490.3489990234375, 42.55940246582031, 178.57681274414062, 14.048324584960938, 275.045166015625, 305.74652099609375, 104.55155181884766, 277.1186218261719, 20.96484375, 258.6595764160156, 157.64622497558594, -195.69122314453125, 190.47792053222656, 87.44567108154297, -429.8540954589844, 452.3149108886719, 832.11279296875, 746.8707275390625, -140.07980346679688, 113.80171203613281, 888.4478759765625, -8.280656814575195, 324.515380859375, 157.9926300048828, 969.4363403320312, -222.09356689453125, 568.9141235351562, -250.54920959472656, 42.29418182373047, 175.98431396484375, 1047.426513671875, 134.97344970703125, -23.836822509765625, 171.68707275390625, 580.8845825195312, 230.50704956054688, -85.90602111816406, 95.7586669921875], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000555.npy"}
|
|
{"epoch": 0.8149779735682819, "step": 556, "batch_size": 64, "mean": 190.92840576171875, "std": 264.32025146484375, "min": -302.218994140625, "p10": -90.50440673828123, "median": 128.83272171020508, "p90": 622.1897216796876, "max": 1068.6500244140625, "pos_frac": 0.78125, "sample": [582.8800048828125, 33.27039337158203, 113.85275268554688, 103.60076904296875, 294.6944274902344, 104.08551788330078, 424.2005920410156, 303.0234069824219, -96.00175476074219, 336.11138916015625, 57.494712829589844, 396.7127990722656, 247.15438842773438, 32.087188720703125, 316.8440246582031, 123.09097290039062, 750.6259765625, 133.87039184570312, 398.7663879394531, -48.24976348876953, -302.218994140625, 40.56726837158203, -247.915283203125, 67.77672576904297, 1.4869384765625, 303.29229736328125, 187.68484497070312, -24.509475708007812, 479.48016357421875, -48.542724609375, 292.5815734863281, 51.45027160644531, 112.70429992675781, -97.33367156982422, 643.3226318359375, -33.951072692871094, 208.9599609375, 48.89833068847656, 56.339698791503906, 18.602304458618164, -108.9310302734375, 331.6814880371094, 200.58595275878906, 123.79505157470703, -204.0579833984375, 256.4581298828125, 715.029052734375, 58.97361755371094, 677.6879272460938, 35.950172424316406, 221.22268676757812, -118.53601837158203, 391.40380859375, 155.3287353515625, 1068.6500244140625, -31.80590057373047, 639.0367431640625, 740.1334838867188, 183.78591918945312, 207.35765075683594, 166.24195861816406, -77.67726135253906, -28.908950805664062, 249.22256469726562], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000556.npy"}
|
|
{"epoch": 0.8164464023494861, "step": 557, "batch_size": 64, "mean": 242.26992797851562, "std": 292.2218017578125, "min": -369.86962890625, "p10": -53.7164421081543, "median": 224.87705993652344, "p90": 622.9346496582033, "max": 976.716064453125, "pos_frac": 0.84375, "sample": [-365.57696533203125, 210.81260681152344, -369.86962890625, 90.30363464355469, 238.94151306152344, 275.55755615234375, 10.296981811523438, 411.9766845703125, 404.665771484375, 93.77479553222656, 490.05133056640625, 444.04803466796875, 482.0483703613281, 15.8623046875, 53.4060173034668, 262.7056884765625, 360.8868408203125, 243.8578338623047, 5.273441314697266, 25.07624053955078, 57.30342102050781, 918.6043701171875, 474.22332763671875, 507.0526123046875, 133.10983276367188, 462.8179016113281, 54.9322509765625, -54.98979187011719, -90.44835662841797, 729.7977294921875, 129.1807861328125, 252.06997680664062, 642.6312866210938, 265.9754638671875, 140.3064422607422, 55.87675857543945, 254.37416076660156, 65.01128387451172, 467.6103515625, 166.54244995117188, 284.9018859863281, -120.5903091430664, 257.53668212890625, 111.62232971191406, 133.45875549316406, -183.607666015625, 412.6548767089844, 838.2640380859375, -10.080291748046875, 958.317626953125, 66.77189636230469, 750.5625610351562, 976.716064453125, 252.09133911132812, 269.5784606933594, -14.298965454101562, 9.806697845458984, 551.9502563476562, -194.05116271972656, 576.975830078125, -50.74529266357422, 241.663818359375, 204.8702392578125, 164.82481384277344], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000557.npy"}
|
|
{"epoch": 0.8179148311306902, "step": 558, "batch_size": 64, "mean": 327.26336669921875, "std": 305.29400634765625, "min": -489.41058349609375, "p10": -31.493932342529263, "median": 300.361572265625, "p90": 715.1891967773438, "max": 940.5133666992188, "pos_frac": 0.890625, "sample": [263.0018310546875, 251.6493682861328, 299.8551025390625, 509.3638610839844, 138.6275634765625, 239.00270080566406, 397.49920654296875, 138.44020080566406, 159.298583984375, -96.22427368164062, -139.80380249023438, 563.4284057617188, 880.2149047851562, 709.6822509765625, 116.12821960449219, 389.6695861816406, 326.94366455078125, 316.1567077636719, 717.54931640625, 666.630126953125, 122.15084075927734, 244.175048828125, 208.00531005859375, 394.7237243652344, -45.374061584472656, 235.10830688476562, 678.3656005859375, 153.46754455566406, 617.9219970703125, 300.8680419921875, 101.12198638916016, 404.5809020996094, 567.6082153320312, 861.0598754882812, 762.18798828125, 485.6962890625, -431.9963684082031, 59.70071792602539, 343.5090637207031, 292.04730224609375, 216.59521484375, 169.87655639648438, 161.77394104003906, 317.31085205078125, 149.96168518066406, -69.80619812011719, -489.41058349609375, 940.5133666992188, 550.4550170898438, 3.5171966552734375, 548.8192138671875, 641.8546752929688, 478.20330810546875, 462.5898132324219, 817.0068359375, -231.85020446777344, 353.5334167480469, 173.177001953125, 137.42416381835938, 0.893035888671875, 282.8046875, 926.6534423828125, 602.9027099609375, 598.0140380859375], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000558.npy"}
|
|
{"epoch": 0.8193832599118943, "step": 559, "batch_size": 64, "mean": 240.78573608398438, "std": 255.19012451171875, "min": -336.60693359375, "p10": -45.7383163452148, "median": 216.8832244873047, "p90": 561.9886962890625, "max": 906.4280395507812, "pos_frac": 0.890625, "sample": [479.8048400878906, 380.86505126953125, 248.0444793701172, 527.292236328125, 94.46900177001953, 40.60206604003906, 119.57557678222656, 104.25492858886719, 271.56524658203125, 906.4280395507812, 455.35260009765625, 335.14093017578125, 366.89697265625, 223.15878295898438, 61.788360595703125, 522.481689453125, 654.5232543945312, 556.4202270507812, -260.4024658203125, -195.258544921875, 496.5154113769531, 130.93321228027344, 42.4547119140625, 168.4446258544922, 221.7541046142578, 417.13800048828125, -65.79369354248047, 39.0506591796875, 420.279541015625, 145.676025390625, 82.81751251220703, 60.143863677978516, 61.73322296142578, 115.79969024658203, 66.49214172363281, 327.11798095703125, -135.8018798828125, 1.0575637817382812, 257.28668212890625, 205.49026489257812, 567.2532348632812, 200.3999481201172, 254.74105834960938, 65.2173843383789, -336.60693359375, 256.5328369140625, 125.71005249023438, 211.30722045898438, 613.6759643554688, 70.97339630126953, 325.0870361328125, 394.5641174316406, 251.84800720214844, 389.40185546875, 767.03271484375, 775.31640625, -159.7471466064453, 564.3751831054688, 362.6015319824219, 187.27691650390625, 212.01234436035156, 38.73081970214844, -223.02882385253906, 544.01904296875], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000559.npy"}
|
|
{"epoch": 0.8208516886930984, "step": 560, "batch_size": 64, "mean": 175.82272338867188, "std": 192.69825744628906, "min": -283.5401611328125, "p10": -72.45325698852537, "median": 166.03240966796875, "p90": 450.6260070800783, "max": 597.3692626953125, "pos_frac": 0.84375, "sample": [339.469970703125, 24.370994567871094, 122.0516586303711, 366.07818603515625, 47.872108459472656, 343.3885803222656, 76.50899505615234, -54.65265655517578, 175.621826171875, 221.10885620117188, 128.9405517578125, 96.70516967773438, 245.11337280273438, 341.82305908203125, 540.36572265625, 51.988868713378906, 593.8797607421875, 466.585205078125, 238.11297607421875, 413.38787841796875, 174.58596801757812, 130.3388671875, 170.85845947265625, 552.6741333007812, 197.7112579345703, -109.52904510498047, 161.20635986328125, 329.58349609375, 215.85496520996094, -93.71734619140625, 395.0899353027344, -42.94239807128906, 27.082962036132812, 227.7835235595703, 200.6116485595703, -283.5401611328125, 60.51178741455078, 597.3692626953125, 172.00930786132812, 467.117919921875, 149.87867736816406, -88.26583862304688, -56.19585418701172, 51.820709228515625, 347.22210693359375, 281.06390380859375, 94.50575256347656, 53.868934631347656, 8.644392013549805, 526.6115112304688, 263.3423767089844, 60.88604736328125, 155.39816284179688, 11.191324234008789, 172.4604949951172, 77.72672271728516, -79.42071533203125, 379.292724609375, 281.17742919921875, 302.93939208984375, -118.07080078125, 58.547691345214844, -121.29615020751953, 109.94366455078125], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000560.npy"}
|
|
{"epoch": 0.8223201174743024, "step": 561, "batch_size": 64, "mean": 190.6529083251953, "std": 241.4559326171875, "min": -558.3182373046875, "p10": -42.55577106475828, "median": 172.3116683959961, "p90": 491.659997558594, "max": 907.673095703125, "pos_frac": 0.828125, "sample": [374.7064208984375, 305.0404052734375, -88.31717681884766, -52.408363342285156, -290.9237060546875, 373.0331726074219, 285.2725524902344, -19.663986206054688, 329.7544860839844, 539.6180419921875, -18.74032974243164, 404.64892578125, 658.2080688476562, 173.8321075439453, 55.843849182128906, 125.59017944335938, 41.994686126708984, 30.795997619628906, 309.25567626953125, 301.78106689453125, 680.0872192382812, 34.60763931274414, 288.258544921875, 233.53738403320312, 92.15567016601562, 212.66258239746094, 560.21630859375, 127.7094497680664, 109.22069549560547, 609.3292236328125, 333.2983703613281, 907.673095703125, 4.3629302978515625, 239.21435546875, 348.351318359375, 1.3162155151367188, -14.331317901611328, 148.7061309814453, 20.195293426513672, 380.99212646484375, 120.86164093017578, -25.86121940612793, 0.45159149169921875, 424.60150146484375, 170.79122924804688, -192.9071044921875, 111.407958984375, 520.3993530273438, 406.028564453125, 334.8542785644531, -558.3182373046875, 344.17523193359375, 268.2713928222656, 60.82477569580078, 158.0225067138672, 21.6702880859375, 255.6611328125, 74.63325500488281, 258.95831298828125, 61.01263427734375, 175.0606689453125, -158.55538940429688, 262.5661315917969, -49.71057891845703], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000561.npy"}
|
|
{"epoch": 0.8237885462555066, "step": 562, "batch_size": 64, "mean": 265.2984619140625, "std": 283.9421081542969, "min": -251.041015625, "p10": -34.76172008514402, "median": 216.09410095214844, "p90": 576.8138427734376, "max": 1216.8914794921875, "pos_frac": 0.859375, "sample": [424.2573547363281, 1212.5321044921875, 467.8479919433594, 239.32315063476562, 91.54763793945312, 365.5633850097656, 199.4888916015625, 126.54600524902344, 290.4036560058594, 393.06793212890625, 354.2023620605469, 709.3500366210938, 103.48519134521484, 369.2506103515625, 526.0181884765625, -67.39506530761719, 166.82440185546875, 272.0211181640625, 115.18055725097656, 332.1470031738281, 182.6083221435547, 293.75482177734375, 419.2830810546875, -71.57479095458984, 18.11064910888672, 511.93701171875, 592.7425537109375, -11.765274047851562, 751.9813842773438, 87.599853515625, -151.7529296875, -63.04901885986328, 2.4040966033935547, 403.7369079589844, -79.90296173095703, 1216.8914794921875, 98.31732177734375, 379.5604248046875, 345.1954650878906, 539.6468505859375, 203.42562866210938, 205.7019805908203, 439.3185729980469, 272.0107727050781, -251.041015625, 207.89891052246094, -43.76307678222656, 527.2813720703125, 379.43084716796875, 799.8319091796875, 34.828128814697266, 218.7122344970703, 362.2478942871094, 113.52589416503906, 254.41067504882812, 213.47596740722656, 12.763103485107422, 32.684898376464844, -13.758554458618164, 27.955501556396484, 112.75477600097656, 30.632431030273438, 670.8909301757812, 10.494483947753906], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000562.npy"}
|
|
{"epoch": 0.8252569750367107, "step": 563, "batch_size": 64, "mean": 187.10414123535156, "std": 274.08074951171875, "min": -481.5744934082031, "p10": -121.34629898071289, "median": 203.39397430419922, "p90": 515.3667449951173, "max": 844.5761108398438, "pos_frac": 0.734375, "sample": [148.13287353515625, -450.1454772949219, -438.08306884765625, 266.3356628417969, 249.3668212890625, -176.4796142578125, 531.0240478515625, 181.45565795898438, 299.0542297363281, 364.0035095214844, 264.265625, 238.76141357421875, -122.4854965209961, 478.8330383300781, 456.26361083984375, 369.46368408203125, 80.93635559082031, 844.5761108398438, 381.6593017578125, -61.39012145996094, 422.288818359375, -18.469749450683594, -118.68817138671875, -105.59425354003906, 197.5796661376953, -5.957439422607422, 368.19561767578125, 345.418212890625, 541.5067138671875, 106.24516296386719, 72.75474548339844, 65.8514633178711, -310.16064453125, 323.95697021484375, -90.98475646972656, 281.3134765625, -63.62203598022461, 113.8697738647461, 469.8548278808594, 332.19354248046875, 412.3814392089844, 454.94110107421875, 593.5345458984375, 149.303466796875, 433.6732482910156, 174.01251220703125, -481.5744934082031, 691.90087890625, -105.03510284423828, 555.5288696289062, -103.64804077148438, 543.6886596679688, 57.556129455566406, 99.85398864746094, 375.89959716796875, -215.40310668945312, 353.68243408203125, -48.38277816772461, 130.59620666503906, 209.20828247070312, 288.06182861328125, 127.099365234375, 260.9280700683594, 183.75779724121094], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000563.npy"}
|
|
{"epoch": 0.8267254038179148, "step": 564, "batch_size": 64, "mean": 236.80673217773438, "std": 200.45639038085938, "min": -250.6559295654297, "p10": -6.169475936889647, "median": 233.17378997802734, "p90": 499.30183410644537, "max": 672.2716674804688, "pos_frac": 0.859375, "sample": [233.1993865966797, 268.6010437011719, 555.6446533203125, -2.9454498291015625, 472.2637023925781, 504.42462158203125, 437.6301574707031, -30.322669982910156, 209.54476928710938, 79.09054565429688, 258.7359924316406, 265.1072998046875, 160.12164306640625, 284.2865905761719, 337.19659423828125, 27.48809814453125, 57.38047409057617, 334.0630187988281, 459.49462890625, 246.8266143798828, 144.77674865722656, 110.72322082519531, 315.0479736328125, -250.6559295654297, 95.82389831542969, 233.148193359375, -119.25863647460938, -13.090675354003906, 17.42315673828125, -157.02545166015625, 234.72549438476562, 649.79296875, 364.280517578125, -4.416130065917969, 184.503173828125, 307.61785888671875, 229.7231903076172, 190.53292846679688, 180.23681640625, -6.920909881591797, 460.70562744140625, 638.2537841796875, 57.7720947265625, 166.96099853515625, 417.32958984375, 270.5132141113281, 637.2587280273438, 295.1654052734375, 584.0374755859375, 340.79046630859375, 228.638916015625, 312.2894592285156, 47.432411193847656, 84.00468444824219, 167.24635314941406, 141.0496063232422, 253.18287658691406, 487.3486633300781, 322.7558898925781, 183.23252868652344, 672.2716674804688, -17.1602783203125, 336.26934814453125, 203.46112060546875], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000564.npy"}
|
|
{"epoch": 0.8281938325991189, "step": 565, "batch_size": 64, "mean": 244.01589965820312, "std": 317.0577087402344, "min": -317.9476013183594, "p10": -134.25024414062497, "median": 203.0470428466797, "p90": 692.3920349121095, "max": 1075.8699951171875, "pos_frac": 0.71875, "sample": [-144.96241760253906, 924.0082397460938, 742.40380859375, -317.9476013183594, 149.7471923828125, 390.18804931640625, 668.2088623046875, -34.34645080566406, -2.1764678955078125, -104.34007263183594, 352.8222351074219, 472.5032958984375, -83.3992919921875, 1.1143798828125, -29.854740142822266, -186.7799530029297, -103.6084213256836, 70.22637939453125, -61.7825927734375, 333.7015686035156, 398.23260498046875, 256.7428283691406, 321.68304443359375, 736.022705078125, -109.25517272949219, 127.18372344970703, 203.68048095703125, 202.41360473632812, -244.7305908203125, 539.7005004882812, 172.38809204101562, 249.58473205566406, 395.3346862792969, 441.12286376953125, 96.54667663574219, -1.112640380859375, 612.7031860351562, 340.4209899902344, -167.40695190429688, 77.44709014892578, -31.621620178222656, 323.51177978515625, 405.85699462890625, 960.5048828125, 273.8583984375, 1075.8699951171875, 274.51068115234375, 861.3336181640625, 456.3769836425781, -153.53591918945312, 182.85906982421875, 18.806533813476562, 497.3780517578125, 356.6484375, 697.63916015625, 288.0093688964844, 500.96246337890625, -7.381999969482422, 141.78643798828125, 77.3075942993164, 104.26690673828125, 130.76959228515625, -183.2763214111328, 680.1487426757812], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000565.npy"}
|
|
{"epoch": 0.8296622613803231, "step": 566, "batch_size": 64, "mean": 250.18751525878906, "std": 326.70904541015625, "min": -441.92724609375, "p10": -151.1496322631836, "median": 297.29283142089844, "p90": 690.592938232422, "max": 947.1311645507812, "pos_frac": 0.6875, "sample": [-49.40423583984375, -176.67578125, 449.7316589355469, 695.2286376953125, -81.63485717773438, -132.76351928710938, 532.2781982421875, 210.122802734375, -276.59527587890625, 406.54595947265625, 349.9006042480469, -116.82084655761719, -10.490966796875, 195.39559936523438, 378.29638671875, 293.9338684082031, 436.646484375, 105.31477355957031, 15.05527114868164, -441.92724609375, 502.4373474121094, 494.8408203125, 300.65179443359375, -202.04681396484375, 384.9855041503906, -79.74346160888672, 250.6022186279297, 594.0921630859375, 41.05888366699219, 715.9517822265625, 519.1212768554688, 916.1273193359375, 533.3948974609375, 63.79408264160156, 729.9632568359375, -44.478294372558594, -55.52485656738281, 947.1311645507812, 381.2267761230469, -151.17860412597656, -39.438167572021484, 380.48284912109375, 79.5108871459961, 664.5606079101562, 469.70867919921875, 707.7954711914062, 734.7152099609375, 451.2842102050781, -151.08203125, -86.8049087524414, -175.26303100585938, 679.7763061523438, 195.60546875, 643.766357421875, 76.81134796142578, -206.4019775390625, 461.0327453613281, 444.87274169921875, 494.88104248046875, -141.96432495117188, -34.73218536376953, 37.09130859375, 318.97998046875, 382.2678527832031], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000566.npy"}
|
|
{"epoch": 0.8311306901615272, "step": 567, "batch_size": 64, "mean": 230.08824157714844, "std": 250.697021484375, "min": -431.220703125, "p10": -33.46759185791014, "median": 197.03998565673828, "p90": 588.4609802246094, "max": 767.8245849609375, "pos_frac": 0.859375, "sample": [-21.81244659423828, 157.49156188964844, 676.3663940429688, 464.5951843261719, 343.1968994140625, 388.725341796875, 58.07965850830078, 105.1996078491211, 679.9808959960938, 269.2774963378906, 529.2809448242188, -62.46672821044922, 686.2722778320312, 295.88134765625, 134.96035766601562, 197.61273193359375, 583.9077758789062, 441.19293212890625, 408.1075744628906, 58.54094314575195, 337.46728515625, 196.4672393798828, 38.60932922363281, 270.47698974609375, 187.633544921875, 309.79931640625, 244.7164764404297, 624.2630004882812, 18.50782585144043, 371.7674865722656, -38.46265411376953, 506.14398193359375, 224.8379669189453, -431.220703125, 183.2242431640625, 54.4664421081543, 62.919315338134766, 394.2701110839844, 767.8245849609375, 642.9697875976562, 63.889801025390625, -285.8372802734375, 107.20440673828125, -8.759727478027344, 40.10993957519531, 574.7954711914062, 224.4788818359375, 98.62042999267578, 49.49285125732422, 150.7049102783203, -68.21363830566406, 590.412353515625, 13.627128601074219, 440.25616455078125, -133.00840759277344, 192.20785522460938, 58.53412628173828, 370.61029052734375, 21.64891815185547, 110.34927368164062, 203.81663513183594, 306.5664978027344, -205.40599060058594, 448.4747314453125], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000567.npy"}
|
|
{"epoch": 0.8325991189427313, "step": 568, "batch_size": 64, "mean": 268.58636474609375, "std": 258.15460205078125, "min": -491.634033203125, "p10": -62.20263175964354, "median": 241.02171325683594, "p90": 607.1706420898439, "max": 1045.9205322265625, "pos_frac": 0.875, "sample": [289.0341796875, -94.65022277832031, 636.3839111328125, 323.8945617675781, 378.47998046875, 259.2216491699219, 348.5992736816406, 381.0885009765625, 434.5967712402344, -491.634033203125, 16.963272094726562, -138.46510314941406, 432.9010925292969, 693.6002197265625, 95.79163360595703, 212.1486053466797, 180.22735595703125, 47.273136138916016, 174.3064422607422, 86.28453063964844, 223.09701538085938, 225.26300048828125, 698.482177734375, 502.4341125488281, 239.09933471679688, 217.15333557128906, 226.88937377929688, 1045.9205322265625, 132.47201538085938, 30.704486846923828, 389.56005859375, 372.9168701171875, 338.1805419921875, 407.54168701171875, 451.01385498046875, 368.8681335449219, 101.66575622558594, 291.3355712890625, 219.3112335205078, 494.4200439453125, 809.1240844726562, -47.84286117553711, 215.2904815673828, 539.26708984375, 318.7569580078125, 161.57708740234375, -115.52590942382812, -74.94471740722656, 164.96173095703125, 655.665283203125, 87.4822006225586, 628.142333984375, 257.85791015625, -223.13442993164062, 183.32992553710938, 207.30404663085938, 355.7911682128906, 363.99163818359375, 242.944091796875, 540.5772094726562, 172.7196044921875, 558.2366943359375, -68.35681915283203, 13.938676834106445], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000568.npy"}
|
|
{"epoch": 0.8340675477239354, "step": 569, "batch_size": 64, "mean": 197.3388214111328, "std": 286.76068115234375, "min": -629.8634643554688, "p10": -85.09010162353513, "median": 163.97257232666016, "p90": 550.4490966796875, "max": 1060.5008544921875, "pos_frac": 0.78125, "sample": [-345.7056579589844, 267.6266174316406, 1060.5008544921875, 158.97000122070312, 530.6414184570312, -61.34507751464844, 249.42306518554688, -3.187896728515625, -238.01229858398438, 80.57460021972656, 521.9180297851562, 698.017822265625, 168.317626953125, 433.7493896484375, 336.6001281738281, -57.728790283203125, 133.77146911621094, 544.034423828125, -18.39339828491211, -30.12566566467285, -629.8634643554688, 537.4096069335938, 63.49404525756836, -44.323062896728516, 50.309112548828125, 473.90570068359375, 554.2625732421875, 35.76554870605469, 56.889068603515625, 89.21844482421875, 410.2900390625, 511.4468688964844, 395.33306884765625, 22.81280517578125, 557.5723876953125, 332.4277648925781, 56.39410400390625, 181.0773162841797, 467.36871337890625, -238.89892578125, 191.22299194335938, 33.7851676940918, -95.26654052734375, 553.1982421875, 189.1255340576172, 111.46695709228516, 310.7015075683594, 221.84579467773438, -157.808837890625, 76.28630065917969, -33.02601623535156, 612.7296752929688, 74.81148529052734, 161.09234619140625, 150.0156707763672, -316.90179443359375, 134.06674194335938, 166.85279846191406, 121.31794738769531, 236.5650634765625, 178.49838256835938, 233.20260620117188, 424.089599609375, 739.275634765625], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000569.npy"}
|
|
{"epoch": 0.8355359765051396, "step": 570, "batch_size": 64, "mean": 242.13690185546875, "std": 247.8187255859375, "min": -223.18594360351562, "p10": -69.33032608032222, "median": 209.4373779296875, "p90": 630.1650024414063, "max": 786.071044921875, "pos_frac": 0.84375, "sample": [104.05259704589844, 681.997314453125, 270.7674865722656, 650.9155883789062, 729.3671875, -7.609020233154297, 388.6753845214844, 573.6046752929688, 637.973388671875, 296.96380615234375, 182.57015991210938, 501.167724609375, 208.24624633789062, 354.98712158203125, 31.39742088317871, 178.93263244628906, 730.5972290039062, 305.9890441894531, 426.50701904296875, -124.72697448730469, 256.7281188964844, -118.9001693725586, -29.272401809692383, -12.698478698730469, 84.36310577392578, 635.513916015625, 11.235393524169922, 87.47473907470703, 197.47039794921875, 285.998046875, 210.62850952148438, 270.31878662109375, -86.49800872802734, 3.149415969848633, 166.4708251953125, 394.35687255859375, 146.2799072265625, 363.64892578125, 12.283073425292969, 617.6842041015625, 151.22955322265625, -105.42166900634766, 34.923675537109375, 249.901611328125, 459.53558349609375, 281.0496826171875, 121.13395690917969, 458.5131530761719, 483.3215637207031, 521.078857421875, 44.81034469604492, 134.76742553710938, 567.6738891601562, 306.3817138671875, 159.48333740234375, -223.18594360351562, 16.74010467529297, 244.59400939941406, 211.34994506835938, -145.60621643066406, -139.22503662109375, 786.071044921875, 145.33419799804688, 113.69541931152344], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000570.npy"}
|
|
{"epoch": 0.8370044052863436, "step": 571, "batch_size": 64, "mean": 258.0314636230469, "std": 315.23236083984375, "min": -487.07098388671875, "p10": -159.4481033325195, "median": 213.77965545654297, "p90": 597.8791931152344, "max": 1078.7962646484375, "pos_frac": 0.828125, "sample": [563.5963134765625, 467.74847412109375, 40.13569259643555, 389.4783020019531, 60.845558166503906, 141.09945678710938, 51.93900680541992, 499.1771545410156, 312.50701904296875, 548.1107177734375, 1078.7962646484375, 72.43846130371094, 109.96533966064453, 541.732177734375, 131.36314392089844, 143.8599395751953, 442.5985107421875, 541.2568359375, 206.27688598632812, 551.391357421875, 849.3878784179688, 814.0423583984375, 622.1300048828125, 255.41973876953125, -249.04678344726562, -48.6527099609375, 407.6986083984375, 171.79393005371094, 400.4201354980469, 549.5636596679688, 113.7491683959961, 3.66326904296875, 356.7104797363281, 870.7898559570312, 583.400390625, 168.31312561035156, -178.821533203125, 105.86819458007812, 599.9113159179688, 462.1422119140625, 374.8767395019531, -140.67323303222656, 627.953369140625, 221.2824249267578, -15.965030670166016, 190.73037719726562, 593.1375732421875, 193.1885528564453, 330.4090270996094, 115.03929138183594, -188.87171936035156, -463.264404296875, 188.20620727539062, 203.2047119140625, -487.07098388671875, 323.25079345703125, 124.66302490234375, 419.43255615234375, -357.4510498046875, 307.7868957519531, 4.81561279296875, 444.77081298828125, -167.49447631835938, -80.74462890625], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000571.npy"}
|
|
{"epoch": 0.8384728340675477, "step": 572, "batch_size": 64, "mean": 235.7051544189453, "std": 340.7324523925781, "min": -446.3598327636719, "p10": -111.3464057922363, "median": 166.85531616210938, "p90": 736.8723510742191, "max": 1212.5777587890625, "pos_frac": 0.765625, "sample": [-172.66671752929688, -353.71624755859375, -40.0306396484375, 408.78314208984375, 854.3984375, 178.79547119140625, 24.661346435546875, 90.4494400024414, 42.16427993774414, -82.99674224853516, 103.7967758178711, 263.7430114746094, 535.4508056640625, 163.01588439941406, 432.00042724609375, 415.7743835449219, 598.5281982421875, 659.6177978515625, 393.6812438964844, 346.8218688964844, 99.67515563964844, 106.07472229003906, 114.74568176269531, 457.3706359863281, -11.697792053222656, 4.543966293334961, 69.02447509765625, -123.49626159667969, -192.42636108398438, 496.96875, 253.85516357421875, 2.9471664428710938, 263.2079772949219, 183.36972045898438, 103.28378295898438, 1212.5777587890625, 119.19219207763672, 222.68087768554688, -60.231590270996094, -5.9300384521484375, 560.3690795898438, 10.13296127319336, -188.73794555664062, 411.4242858886719, -44.483028411865234, -446.3598327636719, 1121.15966796875, 238.46360778808594, 229.8395233154297, -8.273056030273438, 607.55029296875, 128.40997314453125, 326.4148864746094, 914.9993286132812, 37.187198638916016, 229.77743530273438, 438.797607421875, 789.7617797851562, -14.28167724609375, -317.2461242675781, 170.6947479248047, 769.9814453125, 885.9395141601562, 55.60108947753906], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000572.npy"}
|
|
{"epoch": 0.8399412628487518, "step": 573, "batch_size": 64, "mean": 231.14462280273438, "std": 264.0170593261719, "min": -401.99267578125, "p10": -33.525736999511714, "median": 219.67201232910156, "p90": 581.0688110351565, "max": 997.763916015625, "pos_frac": 0.84375, "sample": [244.9833984375, -34.52751159667969, 87.09442901611328, 70.32025146484375, 19.552108764648438, 808.31591796875, 276.6152038574219, 9.258247375488281, 283.906494140625, 297.96490478515625, 302.9603576660156, 181.6742706298828, -48.8770751953125, 997.763916015625, 231.8126220703125, 298.75531005859375, 827.415771484375, -401.99267578125, 90.68183135986328, -21.08489227294922, -260.5215759277344, 714.3058471679688, 131.14434814453125, 246.80133056640625, 36.87720489501953, 205.58544921875, 512.6470336914062, 603.0361938476562, 203.118896484375, 334.85565185546875, -249.9869842529297, 182.7766571044922, 169.4069061279297, 104.69209289550781, 684.0401611328125, -211.33880615234375, 318.1302490234375, -98.41015625, 245.826416015625, -25.766990661621094, 722.3828125, 4.504829406738281, 314.5245361328125, 38.303855895996094, 212.87548828125, 50.72911071777344, 297.493408203125, 529.8115844726562, 454.33056640625, -31.188262939453125, 276.28692626953125, 137.42446899414062, 219.73309326171875, 332.92608642578125, 234.26548767089844, 146.68954467773438, 219.61093139648438, 343.2102966308594, 417.4557800292969, 196.37193298339844, 244.7071533203125, 435.2084655761719, 519.302001953125, 106.48316955566406], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000573.npy"}
|
|
{"epoch": 0.8414096916299559, "step": 574, "batch_size": 64, "mean": 268.8031005859375, "std": 281.9962158203125, "min": -398.32965087890625, "p10": -44.553089904785146, "median": 231.7279281616211, "p90": 662.7252380371094, "max": 958.1531982421875, "pos_frac": 0.84375, "sample": [20.20081329345703, 235.9933319091797, -49.088470458984375, 227.4625244140625, 480.0899658203125, 100.08113861083984, -26.539400100708008, 958.1531982421875, 646.7232055664062, 443.01019287109375, 51.51287078857422, 721.7802734375, 629.3463134765625, 522.117431640625, 139.93423461914062, 278.44952392578125, 39.63274383544922, 292.56268310546875, -353.7968444824219, 840.8421020507812, 410.8656311035156, 406.31109619140625, 80.26631164550781, 467.52374267578125, 225.55654907226562, 512.0671997070312, 193.72979736328125, -147.60504150390625, -71.43550109863281, 2.8101463317871094, 126.25082397460938, -111.97291564941406, 300.4405517578125, 349.2278747558594, 107.59960174560547, 516.6392822265625, -148.84542846679688, 693.401611328125, 176.4821319580078, 434.0543518066406, 625.2373657226562, -33.97053527832031, 593.3031005859375, 350.66082763671875, 41.766265869140625, 158.90231323242188, 306.4487609863281, 167.7079315185547, -398.32965087890625, -4.849212646484375, 243.74513244628906, 109.15140533447266, 36.69520568847656, 705.1941528320312, 378.6958312988281, 432.5343322753906, 222.53036499023438, 669.583251953125, 214.29550170898438, 307.31707763671875, 184.92396545410156, 782.0657348632812, 258.1872863769531, 129.7665557861328], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000574.npy"}
|
|
{"epoch": 0.8428781204111601, "step": 575, "batch_size": 64, "mean": 226.16366577148438, "std": 336.1237487792969, "min": -635.6612548828125, "p10": -219.27518920898427, "median": 244.20915985107422, "p90": 669.4330017089844, "max": 881.3919067382812, "pos_frac": 0.78125, "sample": [548.7539672851562, 248.7284393310547, 369.6072692871094, 389.95703125, 349.4216003417969, -256.2596740722656, 270.4649658203125, 265.56927490234375, 266.8243408203125, 103.74662780761719, 148.9550323486328, 31.653526306152344, -132.97805786132812, 461.04730224609375, 305.3135986328125, 37.95861053466797, -61.63555908203125, 672.995849609375, 59.74470138549805, 248.8689727783203, 306.2031555175781, 238.3106689453125, 661.1196899414062, 94.56242370605469, 112.63323211669922, 36.627647399902344, -635.6612548828125, 239.68988037109375, -309.6982421875, 366.07861328125, -99.6119613647461, 838.5010375976562, 381.9306335449219, 772.59326171875, 48.08600616455078, -29.426071166992188, 62.944068908691406, -37.82164764404297, 722.5569458007812, 547.7174072265625, -12.372314453125, -75.40166473388672, -294.5167541503906, 656.466064453125, 772.1640625, -484.25592041015625, 437.97332763671875, 383.3777160644531, 453.5406494140625, 863.5146484375, 140.91348266601562, 562.2733154296875, 429.2985534667969, 40.83953857421875, 508.9853515625, 167.00408935546875, 358.9158020019531, -440.18572998046875, 881.3919067382812, 15.311046600341797, -352.38433837890625, 151.05764770507812, 159.6111297607422, 504.8794250488281], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000575.npy"}
|
|
{"epoch": 0.8443465491923642, "step": 576, "batch_size": 64, "mean": 214.5222625732422, "std": 293.86767578125, "min": -512.2797241210938, "p10": -159.41121826171874, "median": 212.4516143798828, "p90": 619.6474731445312, "max": 792.9039306640625, "pos_frac": 0.765625, "sample": [329.06146240234375, 748.540283203125, -165.72999572753906, 37.185367584228516, -193.17250061035156, 84.36405944824219, -50.26072692871094, 650.4005737304688, 404.1839904785156, 110.96155548095703, 214.30044555664062, 619.729248046875, 147.64657592773438, 154.3573760986328, 388.02899169921875, 293.26055908203125, 524.502685546875, -366.29425048828125, 383.3656311035156, 314.14044189453125, 392.6023254394531, 352.236572265625, 535.97314453125, 640.185791015625, 41.7190055847168, 792.9039306640625, -144.6674041748047, -196.71031188964844, 302.5039367675781, 439.48602294921875, -119.15098571777344, 204.55958557128906, 147.36184692382812, 27.408287048339844, 383.0103759765625, 619.4566650390625, 766.0260620117188, 210.602783203125, -31.1120662689209, 435.90814208984375, 50.48292541503906, 540.2192993164062, 266.50531005859375, 264.206298828125, 23.756248474121094, -512.2797241210938, -30.389450073242188, 293.76904296875, -274.96142578125, 245.1159210205078, 270.9141540527344, 749.8638916015625, 560.0282592773438, 496.4619140625, -3.364307403564453, 33.85527038574219, 44.72284698486328, 126.71796417236328, 91.36466217041016, 121.1007080078125, -310.6382751464844, -66.4097900390625, -27.0540771484375, 346.5619201660156], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000576.npy"}
|
|
{"epoch": 0.8458149779735683, "step": 577, "batch_size": 64, "mean": 233.4116668701172, "std": 248.28775024414062, "min": -530.3923950195312, "p10": -19.075731658935542, "median": 214.25200653076172, "p90": 562.5327270507813, "max": 923.908447265625, "pos_frac": 0.875, "sample": [-530.3923950195312, 923.908447265625, 427.733154296875, 384.60675048828125, 64.83258819580078, 79.29209899902344, 269.8919372558594, 192.6888885498047, 115.59331512451172, -12.892791748046875, 565.373046875, 192.76461791992188, 307.6582946777344, 358.5203552246094, 142.56504821777344, 564.6746215820312, 518.19189453125, 360.92657470703125, 4.9633331298828125, 409.31378173828125, 95.35426330566406, 229.99081420898438, -217.13485717773438, 173.289794921875, 283.4976806640625, 339.1533508300781, 128.72177124023438, 390.73583984375, 281.38555908203125, 369.89990234375, 319.0359191894531, 176.42330932617188, 159.54318237304688, 239.90460205078125, 557.5349731445312, 50.730072021484375, 314.2979736328125, -49.25527572631836, 147.1848907470703, -385.427734375, 48.256744384765625, 156.82684326171875, 357.44976806640625, 277.97021484375, -21.725563049316406, 148.11524963378906, 89.85774230957031, 576.0504760742188, 354.30999755859375, 644.994140625, 123.22035217285156, 114.11370086669922, 632.1961669921875, 93.50721740722656, 717.9781494140625, 349.9040222167969, 347.0179138183594, -63.94793701171875, -241.8265838623047, 440.11199951171875, 151.99412536621094, 411.23590087890625, 87.1437759399414, 198.51319885253906], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000577.npy"}
|
|
{"epoch": 0.8472834067547724, "step": 578, "batch_size": 64, "mean": 241.57240295410156, "std": 231.2786407470703, "min": -260.4533996582031, "p10": -46.757933044433585, "median": 222.58992767333984, "p90": 558.3298645019532, "max": 829.4097900390625, "pos_frac": 0.84375, "sample": [829.4097900390625, 117.3360595703125, 384.6582336425781, 188.15673828125, 299.3892517089844, 249.81509399414062, 613.0438232421875, 376.8087463378906, 102.99911499023438, 23.220775604248047, 109.10621643066406, 674.8992309570312, 237.92771911621094, 237.809326171875, 92.84844970703125, -260.4533996582031, 303.84906005859375, 39.49687957763672, 569.11962890625, 443.123779296875, 497.45928955078125, 361.99505615234375, 433.3791198730469, -73.53306579589844, -132.68331909179688, 264.6209411621094, 373.62738037109375, 50.57659149169922, -23.635021209716797, 129.70993041992188, -1.6715850830078125, 157.46533203125, 272.0068359375, 152.61984252929688, 36.36213684082031, 454.3292236328125, 61.93132781982422, 61.58208465576172, -217.67050170898438, 96.96024322509766, 145.9485321044922, 348.9399108886719, 184.47775268554688, 209.3527069091797, 474.99591064453125, 344.4303894042969, 90.19475555419922, -63.90983581542969, 507.5250244140625, 177.8422393798828, 212.69442749023438, 580.7777709960938, 212.84524536132812, 438.7676696777344, 400.5460205078125, 533.1537475585938, -35.915313720703125, 467.33489990234375, 605.3598022460938, 606.4681396484375, 232.33460998535156, -134.3201904296875, -51.40476989746094, 384.19842529296875], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000578.npy"}
|
|
{"epoch": 0.8487518355359766, "step": 579, "batch_size": 64, "mean": 201.4764404296875, "std": 250.33419799804688, "min": -433.51849365234375, "p10": -147.71248779296866, "median": 194.68404388427734, "p90": 471.9400115966797, "max": 921.1918334960938, "pos_frac": 0.84375, "sample": [0.7475204467773438, -50.566139221191406, -260.57275390625, 402.5220031738281, -29.66448974609375, 152.71221923828125, 457.6134338378906, 180.790283203125, 352.8516540527344, 407.63250732421875, 41.96508026123047, 79.55152893066406, -433.51849365234375, 264.96240234375, 243.32269287109375, 410.3075256347656, 246.42515563964844, 342.1484375, 762.9415893554688, 36.65592956542969, 82.95331573486328, 88.46036529541016, 169.68234252929688, 317.4691162109375, 196.16551208496094, 221.29351806640625, 328.7000732421875, 92.30542755126953, 295.0050048828125, 248.68809509277344, 231.72869873046875, 89.83824157714844, 472.3675537109375, 470.9424133300781, 921.1918334960938, 582.611328125, 298.470947265625, -240.4185333251953, -24.240192413330078, 395.5467224121094, 642.65380859375, 126.75094604492188, 412.1456604003906, -210.24285888671875, 682.2772827148438, 65.70849609375, 109.17350769042969, 15.512985229492188, -243.8435516357422, 195.31932067871094, 507.2933044433594, 192.06698608398438, 159.7605743408203, 285.4487609863281, 319.2223815917969, -193.16647338867188, 204.10348510742188, 166.53106689453125, 75.34255981445312, -189.3466339111328, 13.516250610351562, 365.49395751953125, 151.13168334960938, 194.04876708984375], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000579.npy"}
|
|
{"epoch": 0.8502202643171806, "step": 580, "batch_size": 64, "mean": 278.8363342285156, "std": 299.1099548339844, "min": -586.0692749023438, "p10": -30.280702972412087, "median": 246.4615707397461, "p90": 648.6338439941406, "max": 1153.0257568359375, "pos_frac": 0.875, "sample": [88.04182434082031, 650.552734375, 61.71342468261719, 236.45152282714844, -45.371578216552734, 190.20330810546875, 191.11256408691406, 829.2659912109375, 255.6670379638672, 249.3903045654297, 385.11151123046875, 120.303466796875, 1081.7705078125, 160.20742797851562, 53.21232986450195, 486.7322998046875, 265.07550048828125, 31.746074676513672, 434.79412841796875, 345.14453125, 245.0061798095703, 430.3105773925781, 565.9181518554688, 262.6992492675781, 305.4112854003906, 123.66577911376953, 376.1804504394531, 395.27862548828125, 366.3836669921875, 104.67951965332031, 199.250244140625, 181.6012420654297, 162.27120971679688, 70.87644958496094, 564.9415283203125, 572.7528076171875, 78.46902465820312, -394.2775573730469, 644.1564331054688, 428.6900329589844, 710.68505859375, 100.16197204589844, -586.0692749023438, 385.0601806640625, 54.75609588623047, 324.39495849609375, -89.60415649414062, -44.54298400878906, 478.99493408203125, 1153.0257568359375, 210.56004333496094, -10.011566162109375, 863.9371948242188, 164.06857299804688, 265.4251708984375, 171.9107666015625, 19.98845672607422, 242.61349487304688, -38.96747589111328, -88.20695495605469, 256.264404296875, 704.5479736328125, 593.225341796875, 247.91696166992188], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000580.npy"}
|
|
{"epoch": 0.8516886930983847, "step": 581, "batch_size": 64, "mean": 196.97613525390625, "std": 254.11041259765625, "min": -324.4126281738281, "p10": -98.54985504150385, "median": 153.70510864257812, "p90": 487.26572875976564, "max": 824.1366577148438, "pos_frac": 0.78125, "sample": [347.5147399902344, 71.47541809082031, 86.3040771484375, -41.45780944824219, 257.7917785644531, 153.0075225830078, -271.90179443359375, 243.3101806640625, 28.905235290527344, -41.482181549072266, 110.37646484375, 420.29376220703125, 345.61181640625, 147.706298828125, 422.2818603515625, -324.4126281738281, 824.1366577148438, 351.91827392578125, 510.16400146484375, 800.8961181640625, -123.00743103027344, 426.3416748046875, -2.953824996948242, 302.62530517578125, 196.0032958984375, 420.2673034667969, 154.40269470214844, 46.15027618408203, 235.802001953125, 434.2170104980469, -161.18443298339844, 713.5972900390625, 32.16714096069336, 481.2393798828125, 200.48858642578125, 226.58404541015625, 527.5328979492188, 133.2077178955078, 86.0287857055664, 364.62579345703125, 150.81094360351562, 62.608367919921875, 32.60052490234375, 147.8705596923828, 274.73797607421875, 94.38914489746094, 489.84844970703125, 439.73828125, 770.098388671875, 210.40553283691406, -251.8544921875, -293.1674499511719, 83.76138305664062, -219.53146362304688, 312.63946533203125, -27.322006225585938, -21.826839447021484, -15.706436157226562, 141.67034912109375, -0.08127593994140625, 292.6510925292969, 332.9436950683594, 98.29529571533203, 364.3180847167969], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000581.npy"}
|
|
{"epoch": 0.8531571218795888, "step": 582, "batch_size": 64, "mean": 264.7296142578125, "std": 292.3086242675781, "min": -619.9075317382812, "p10": -30.136843109130854, "median": 291.66539001464844, "p90": 675.1163574218751, "max": 792.1494140625, "pos_frac": 0.875, "sample": [-619.9075317382812, 166.3709716796875, 771.3803100585938, 398.10833740234375, 583.0548095703125, -25.89178466796875, 287.5093688964844, 364.489013671875, 315.275146484375, 36.75222396850586, 302.8661193847656, 178.9739990234375, 1.7750396728515625, 521.5897827148438, 336.3545837402344, 457.6321716308594, 36.55464172363281, 12.033340454101562, 199.3109588623047, 685.53125, 334.45123291015625, 29.830337524414062, 406.9443359375, -235.5728759765625, 116.48680877685547, 778.20068359375, 62.082130432128906, 343.6391906738281, 792.1494140625, 716.0689697265625, 237.7891845703125, 295.8214111328125, 557.0582275390625, 3.5778427124023438, -257.7174072265625, 104.83208465576172, 33.072669982910156, -50.05696105957031, 328.55377197265625, 466.83270263671875, 93.87806701660156, -83.41206359863281, 643.7261352539062, 715.400634765625, 328.6937255859375, 736.47216796875, 33.445579528808594, 179.4058074951172, 9.673477172851562, 158.1647491455078, 63.74678421020508, 650.81494140625, 581.417236328125, 334.057373046875, -31.956153869628906, -301.7452087402344, 313.4934387207031, 112.39469146728516, 491.7802734375, 469.4599304199219, 626.9700927734375, 91.85023498535156, 421.67987060546875, 229.47706604003906], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000582.npy"}
|
|
{"epoch": 0.8546255506607929, "step": 583, "batch_size": 64, "mean": 188.09481811523438, "std": 293.0146789550781, "min": -297.57073974609375, "p10": -231.92861022949216, "median": 148.6100616455078, "p90": 522.0634826660156, "max": 1061.1964111328125, "pos_frac": 0.796875, "sample": [191.07766723632812, 173.85406494140625, 243.33151245117188, 266.47393798828125, 234.42922973632812, 141.9700469970703, 601.43896484375, 228.2566680908203, 1061.1964111328125, 124.34044647216797, 25.88458251953125, 41.17051696777344, -264.58636474609375, 146.00608825683594, 143.49850463867188, -197.45025634765625, 513.8640747070312, 447.9264831542969, 251.38919067382812, 428.9326171875, 339.3594055175781, 60.183250427246094, 22.22134017944336, -276.19244384765625, 99.20549774169922, -273.7385559082031, 261.2811279296875, 38.6390380859375, -262.8780822753906, -151.61175537109375, 53.51130294799805, 27.793853759765625, 240.9058837890625, 460.6455078125, 357.5198974609375, 981.1744995117188, 235.10382080078125, 9.365928649902344, 525.5775146484375, 79.86347198486328, -7.626708984375, 151.2140350341797, 248.845458984375, -71.10762786865234, -145.61761474609375, -297.57073974609375, 810.6017456054688, 327.2684326171875, -279.3382263183594, 242.4644012451172, 440.09588623046875, 668.9112548828125, -0.48760223388671875, 336.11956787109375, 421.81475830078125, -246.70504760742188, 44.02936935424805, 435.0381774902344, 82.41754150390625, 222.57278442382812, 744.5410766601562, 87.44842529296875, 80.94550323486328, 111.25814819335938], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000583.npy"}
|
|
{"epoch": 0.856093979441997, "step": 584, "batch_size": 64, "mean": 298.12957763671875, "std": 245.5133819580078, "min": -150.76194763183594, "p10": 52.58110084533692, "median": 267.55247497558594, "p90": 720.8184509277345, "max": 868.063232421875, "pos_frac": 0.953125, "sample": [355.32806396484375, 206.4915313720703, 50.57438278198242, 305.6951599121094, 62.270751953125, -150.76194763183594, 436.9883117675781, 400.23773193359375, 333.0185546875, 67.90879821777344, 60.51019287109375, 130.59259033203125, 77.93576049804688, 218.2685546875, 339.8926696777344, 551.7374877929688, 286.31427001953125, 744.82763671875, 585.3453979492188, 34.330467224121094, 284.0221252441406, 161.83184814453125, 21.356979370117188, 193.92698669433594, 205.6285400390625, 58.463775634765625, 702.9537963867188, 127.28153228759766, 137.74594116210938, 220.6396484375, 325.97052001953125, 308.55987548828125, 59.3529052734375, 72.29183959960938, 421.8198547363281, 317.41412353515625, 195.7043914794922, 226.43959045410156, 58.554107666015625, 369.9342956542969, 366.55694580078125, 16.21652603149414, 275.9783935546875, 778.7506713867188, 259.1265563964844, 178.1702880859375, 464.1817626953125, 868.063232421875, -149.19207763671875, 728.4747314453125, 645.2890625, 57.26344299316406, 858.7721557617188, 808.2213745117188, 404.8114013671875, 512.3487548828125, 197.00999450683594, 539.53564453125, -11.342063903808594, 239.63864135742188, 297.1944274902344, 326.5379638671875, 746.6184692382812, 104.66806030273438], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000584.npy"}
|
|
{"epoch": 0.8575624082232012, "step": 585, "batch_size": 64, "mean": 225.47747802734375, "std": 279.8952331542969, "min": -455.5902404785156, "p10": -100.32156677246093, "median": 221.31221771240234, "p90": 536.9313110351563, "max": 1024.2122802734375, "pos_frac": 0.734375, "sample": [159.59820556640625, -66.75828552246094, 472.9668884277344, 240.5753936767578, -209.29029846191406, 1024.2122802734375, 405.11077880859375, -105.71734619140625, 184.53109741210938, 236.32513427734375, 665.2700805664062, 45.51335144042969, 185.40762329101562, 321.9332275390625, 140.78277587890625, 493.98760986328125, 470.8443908691406, 455.6188659667969, 894.6866455078125, 238.3028564453125, 279.31512451171875, 545.1904296875, 114.42333221435547, 286.4676208496094, 378.4463195800781, -12.732345581054688, 249.8733673095703, 256.40045166015625, 477.9181213378906, 111.21316528320312, 20.439346313476562, 382.577880859375, 222.630859375, -120.35964965820312, 600.8861083984375, 120.4664535522461, 575.2696533203125, -2.9818763732910156, -11.922866821289062, 456.50506591796875, 160.67941284179688, -191.00042724609375, 472.7001037597656, -41.978248596191406, 402.15301513671875, 177.2224578857422, 346.44622802734375, -52.412384033203125, 415.6407470703125, -11.025350570678711, -87.73141479492188, -455.5902404785156, -48.10445022583008, -291.6678161621094, -232.52964782714844, 697.501708984375, 409.91522216796875, 79.76670837402344, 428.02960205078125, -22.01318359375, 219.9935760498047, 167.81927490234375, 185.1558380126953, 517.6600341796875], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000585.npy"}
|
|
{"epoch": 0.8590308370044053, "step": 586, "batch_size": 64, "mean": 199.46292114257812, "std": 272.1425476074219, "min": -395.1737060546875, "p10": -103.09475708007811, "median": 158.7035903930664, "p90": 528.1288757324219, "max": 893.8165283203125, "pos_frac": 0.78125, "sample": [767.3692016601562, 119.1375503540039, 3.1278724670410156, 55.41230010986328, 281.2046813964844, 246.50698852539062, 5.365379333496094, 244.53904724121094, 55.53899383544922, 434.0311584472656, -19.304973602294922, 162.68185424804688, 325.4705810546875, 624.7792358398438, -111.50167083740234, 36.78593063354492, 474.1028137207031, -395.1737060546875, 61.91968536376953, 157.23350524902344, 86.10428619384766, -48.783424377441406, 55.875999450683594, 502.9342041015625, 584.7693481445312, -94.70152282714844, 388.77294921875, -189.29202270507812, 215.83319091796875, 23.315948486328125, 425.29534912109375, 23.22919464111328, 519.7302856445312, -154.48516845703125, 296.9467468261719, 155.45437622070312, 452.7152099609375, -23.89832305908203, 145.09893798828125, -21.948694229125977, 614.7979125976562, -95.32603454589844, 531.728271484375, 374.1381530761719, -384.5579528808594, -244.3808135986328, 160.17367553710938, 416.5174560546875, 328.9866027832031, 51.975975036621094, -106.42420959472656, 25.466331481933594, 438.09429931640625, 893.8165283203125, 519.2064208984375, 210.14430236816406, 434.41180419921875, 733.2518310546875, 120.67974090576172, -76.94806671142578, 221.4102783203125, 435.7132568359375, 194.8536834716797, 95.70370483398438], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000586.npy"}
|
|
{"epoch": 0.8604992657856094, "step": 587, "batch_size": 64, "mean": 230.1880340576172, "std": 269.2070617675781, "min": -539.1246337890625, "p10": -82.84138870239258, "median": 207.2339859008789, "p90": 577.5026306152345, "max": 864.18994140625, "pos_frac": 0.84375, "sample": [182.0252227783203, 508.33349609375, 323.7984619140625, 14.983671188354492, 380.9953308105469, 45.669715881347656, 345.37677001953125, -38.1511344909668, -2.9747657775878906, 479.28692626953125, -83.68359375, 220.17657470703125, 791.7328491210938, 141.6527099609375, 212.48550415039062, 291.5326843261719, 8.961044311523438, 218.26771545410156, 181.99099731445312, 275.19940185546875, -126.40489196777344, 306.2717590332031, 640.81884765625, 17.784534454345703, 221.36778259277344, 822.76611328125, 5.058135986328125, 486.57501220703125, 202.82540893554688, -131.41664123535156, 162.69815063476562, 70.23600006103516, 184.17185974121094, 13.41461181640625, 431.84649658203125, 145.2666473388672, 112.42668914794922, -539.1246337890625, 211.64256286621094, 80.05183410644531, -210.7979736328125, 702.416748046875, 70.41307067871094, 569.1314697265625, -105.94847106933594, 796.7926025390625, 223.44418334960938, -132.2314910888672, 190.11544799804688, 191.98387145996094, 400.3057556152344, 864.18994140625, 309.729736328125, 433.332763671875, 314.5976867675781, 437.1253967285156, 533.8206787109375, -80.8762435913086, 235.05960083007812, 14.743724822998047, 177.02346801757812, 48.32305908203125, 352.3133850097656, 581.0902709960938], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000587.npy"}
|
|
{"epoch": 0.8619676945668135, "step": 588, "batch_size": 64, "mean": 234.59266662597656, "std": 274.168212890625, "min": -328.1515808105469, "p10": -50.424454498291006, "median": 201.56609344482422, "p90": 594.9149902343752, "max": 1018.4344482421875, "pos_frac": 0.8125, "sample": [246.00534057617188, 265.53167724609375, -54.884132385253906, 41.24848556518555, 327.35992431640625, 499.1080017089844, 434.21270751953125, 34.90594482421875, 479.18695068359375, 618.256103515625, 0.77337646484375, -13.959526062011719, -4.512542724609375, 195.6339111328125, -66.19927978515625, -159.03067016601562, 118.29623413085938, 34.487762451171875, 116.29724884033203, 140.82574462890625, 540.452392578125, 23.706634521484375, -222.20594787597656, 486.4282531738281, 172.24244689941406, 184.95140075683594, 801.3001098632812, 37.265926361083984, -222.3451690673828, 18.86370086669922, 228.61077880859375, 90.99760437011719, 187.4953155517578, 655.2677001953125, 486.69073486328125, 207.49827575683594, 256.512939453125, -38.718955993652344, 436.62225341796875, 358.1263427734375, 654.5411376953125, 274.6884765625, 56.678985595703125, 466.42376708984375, 226.90248107910156, 119.63245391845703, 105.88810729980469, 711.5701293945312, -328.1515808105469, 367.7554626464844, -22.79885482788086, 1018.4344482421875, 971.8799438476562, 349.18914794921875, 388.80609130859375, 356.5249328613281, 287.0914306640625, 234.31158447265625, -107.35356903076172, 115.58416748046875, 179.25967407226562, 214.42349243164062, 469.36138916015625, -40.01853942871094], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000588.npy"}
|
|
{"epoch": 0.8634361233480177, "step": 589, "batch_size": 64, "mean": 292.2073669433594, "std": 291.0321350097656, "min": -290.8618469238281, "p10": -42.86943130493163, "median": 262.1709899902344, "p90": 689.1322998046876, "max": 996.97412109375, "pos_frac": 0.84375, "sample": [568.8890991210938, 343.6833801269531, 301.796142578125, 90.755859375, 376.62969970703125, 1.6933708190917969, -22.886184692382812, 592.0119018554688, 260.24017333984375, -63.66912078857422, 23.085281372070312, 55.900909423828125, 169.6358642578125, 283.88397216796875, 171.96923828125, 877.4503173828125, 996.97412109375, -47.11656951904297, 264.101806640625, 108.94393920898438, 494.7249755859375, -78.0245361328125, 104.80418395996094, 786.3243408203125, 121.160888671875, 188.3460693359375, 272.6637268066406, 220.02627563476562, 597.2206420898438, -10.271726608276367, 572.0919189453125, 595.1478881835938, 239.8271484375, 469.9097900390625, 871.8428955078125, -32.959442138671875, -85.00323486328125, 35.118995666503906, 563.2723388671875, 128.98764038085938, 413.16748046875, 48.68646240234375, 309.7395935058594, 408.1083984375, 374.5763244628906, 125.94412994384766, 700.99951171875, 206.61412048339844, 297.9822082519531, 904.298095703125, 661.442138671875, 178.87484741210938, 416.96502685546875, -196.95565795898438, 225.65757751464844, -290.8618469238281, 554.4155883789062, 835.6851806640625, 27.61952018737793, 228.19882202148438, 312.2449645996094, 288.78369140625, -180.6125946044922, 440.5149230957031], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000589.npy"}
|
|
{"epoch": 0.8649045521292217, "step": 590, "batch_size": 64, "mean": 263.3584289550781, "std": 264.7022399902344, "min": -455.013671875, "p10": -68.03897323608398, "median": 265.75494384765625, "p90": 597.7016235351564, "max": 918.778564453125, "pos_frac": 0.84375, "sample": [239.9351043701172, 850.2911376953125, 612.5612182617188, 108.64904022216797, 259.4138488769531, -104.23800659179688, 293.55352783203125, 358.8004150390625, 462.41455078125, -196.49612426757812, 174.2051239013672, -122.89590454101562, 378.1008605957031, 656.0317993164062, 189.0079345703125, 103.37760925292969, -70.07196807861328, 17.284156799316406, 495.65411376953125, 151.0487060546875, 138.01467895507812, -92.46178436279297, -63.295318603515625, 254.39964294433594, -25.949951171875, 143.29833984375, 563.0292358398438, 105.93712615966797, 918.778564453125, -15.331140518188477, 280.499755859375, 367.652587890625, 131.50149536132812, 735.657958984375, 390.1567077636719, 346.8320007324219, 211.10401916503906, 337.291748046875, 480.8045654296875, 272.0960388183594, 281.15740966796875, 77.11517333984375, -167.6310577392578, -455.013671875, 284.992919921875, 52.52961730957031, 366.46734619140625, 363.5801086425781, 213.0485076904297, 196.14443969726562, 425.46661376953125, 520.1397705078125, 197.45828247070312, 72.25501251220703, 117.00503540039062, 650.37255859375, 539.8072509765625, 869.2366943359375, 307.42608642578125, 349.7890930175781, 535.7137451171875, 317.8995056152344, 39.61616134643555, 363.72015380859375], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000590.npy"}
|
|
{"epoch": 0.8663729809104258, "step": 591, "batch_size": 64, "mean": 248.6875762939453, "std": 244.32740783691406, "min": -335.581787109375, "p10": -17.243749237060523, "median": 251.52352905273438, "p90": 532.3785217285157, "max": 945.3941650390625, "pos_frac": 0.890625, "sample": [29.814979553222656, 945.3941650390625, 534.6082153320312, 422.55316162109375, 672.18115234375, 5.5170440673828125, 193.46165466308594, -279.5326232910156, 293.3197021484375, 701.31298828125, 407.14508056640625, 183.182373046875, 469.06488037109375, 269.72283935546875, 114.81907653808594, 744.1943359375, 468.6206970214844, 364.8918151855469, 120.38133239746094, 186.46905517578125, 125.42231750488281, 500.54888916015625, 12.22730827331543, 90.40747833251953, 187.66058349609375, 127.1926498413086, -134.8937225341797, 346.18548583984375, 233.32421875, 308.76153564453125, -179.16275024414062, 125.10870361328125, 302.29119873046875, -85.99549865722656, 406.2845764160156, 47.90547180175781, 100.64372253417969, 153.60910034179688, 396.8760986328125, 5.600011825561523, -335.581787109375, 431.9374694824219, 278.3750915527344, 363.4672546386719, -26.998374938964844, 527.1759033203125, 458.0523986816406, 17.790035247802734, 381.34124755859375, 348.861328125, 232.25460815429688, 274.7255554199219, 396.5818176269531, 101.9044189453125, 536.1466064453125, 289.57354736328125, 601.12060546875, 406.2811279296875, -187.71438598632812, 343.7129821777344, 141.95664978027344, 94.46138763427734, 163.50282287597656, 159.95700073242188], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000591.npy"}
|
|
{"epoch": 0.8678414096916299, "step": 592, "batch_size": 64, "mean": 227.9654083251953, "std": 328.8979797363281, "min": -702.28515625, "p10": -193.2630645751953, "median": 269.69478607177734, "p90": 570.0901000976563, "max": 920.0574951171875, "pos_frac": 0.78125, "sample": [173.1613311767578, 573.7828979492188, -107.90829467773438, 288.9873352050781, -324.5067138671875, 50.16548156738281, 359.0626220703125, 627.578857421875, 672.0186157226562, 391.169677734375, -138.72581481933594, 892.8414916992188, 70.04859161376953, 544.8768310546875, -473.6846618652344, -18.17691421508789, 195.87429809570312, 295.2033996582031, -34.993896484375, 920.0574951171875, 118.88860321044922, 434.5951232910156, -230.52743530273438, 594.0640869140625, -84.03578186035156, 75.49649047851562, 177.78695678710938, 543.2975463867188, 560.8033447265625, 368.330078125, 43.40206527709961, -73.43800354003906, 88.01858520507812, 425.8291931152344, 537.9466552734375, 153.83599853515625, 29.7034912109375, 350.074462890625, -176.55789184570312, 410.34222412109375, 250.40223693847656, 146.00027465820312, 408.758056640625, 536.8009643554688, 200.781982421875, -200.42242431640625, 388.250732421875, -702.28515625, 499.3468322753906, 309.049560546875, 451.5115966796875, 138.90675354003906, 561.4735717773438, 458.9642028808594, -283.54449462890625, 524.5139770507812, 453.16815185546875, 168.55438232421875, 584.9459228515625, 147.35845947265625, -639.5610961914062, 365.45379638671875, 370.9130859375, 145.7559814453125], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000592.npy"}
|
|
{"epoch": 0.869309838472834, "step": 593, "batch_size": 64, "mean": 168.91940307617188, "std": 262.4555969238281, "min": -446.08343505859375, "p10": -136.02463226318358, "median": 136.94353485107422, "p90": 478.7528045654297, "max": 938.533203125, "pos_frac": 0.703125, "sample": [-13.886703491210938, 289.2868347167969, -108.18012237548828, -113.71844482421875, -72.3558578491211, -168.16180419921875, 342.42047119140625, -145.5844268798828, 417.6304931640625, 520.9549560546875, 222.57778930664062, 479.739990234375, 498.029052734375, 501.8743591308594, 59.791404724121094, -33.33964538574219, 481.9715576171875, 410.638427734375, 397.92657470703125, 139.2488250732422, -21.653553009033203, 93.047119140625, 110.5472183227539, 65.43177032470703, 266.9361877441406, 938.533203125, 227.82284545898438, 153.73670959472656, 210.7198944091797, -358.3675537109375, -69.68953704833984, 476.4493713378906, -92.14725494384766, 101.28204345703125, 342.36767578125, 462.6097412109375, -60.675872802734375, 611.8482666015625, -10.264411926269531, 71.46884155273438, -446.08343505859375, 468.8072509765625, -222.9549560546875, -311.42999267578125, 96.51210021972656, -65.1151123046875, 23.161157608032227, 134.63824462890625, 409.9401550292969, 427.9656677246094, 249.6221160888672, 173.37249755859375, 357.3915100097656, 328.93359375, 416.00933837890625, 265.38739013671875, 127.48590087890625, 3.8628787994384766, -211.037841796875, 471.920166015625, -10.84661865234375, 78.00743865966797, 41.404170989990234, 377.02166748046875], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000593.npy"}
|
|
{"epoch": 0.8707782672540382, "step": 594, "batch_size": 64, "mean": 234.74356079101562, "std": 296.00164794921875, "min": -401.75823974609375, "p10": -156.47316131591796, "median": 227.93984985351562, "p90": 681.9151916503906, "max": 1032.0908203125, "pos_frac": 0.796875, "sample": [336.5011901855469, -77.53549194335938, 24.62837028503418, 10.611360549926758, -36.01750946044922, -201.36727905273438, 625.9584350585938, 221.1297607421875, 234.74993896484375, 243.12530517578125, 178.45887756347656, 312.1932067871094, 800.7300415039062, 512.4755249023438, 128.03231811523438, 696.7427978515625, 505.49139404296875, 189.08717346191406, 416.96624755859375, -114.54513549804688, -243.12974548339844, 287.94805908203125, -205.51852416992188, 672.6334838867188, -103.27799987792969, 361.11688232421875, -229.19894409179688, 320.1127624511719, 459.2234191894531, 202.11209106445312, 159.16648864746094, 291.7474670410156, 350.9593505859375, 560.3914184570312, 332.77166748046875, 293.6011962890625, 325.6903381347656, 14.185810089111328, 159.77725219726562, -401.75823974609375, 113.2901611328125, 713.871337890625, 214.05230712890625, 153.27398681640625, 259.4939880371094, 695.83056640625, 626.1072387695312, 257.10064697265625, 101.52542114257812, 316.8118896484375, -160.83822631835938, 25.93255615234375, 68.45547485351562, -260.7851867675781, 379.70849609375, -31.01968765258789, 352.78790283203125, 685.89306640625, 711.9275512695312, 169.74073791503906, 1032.0908203125, -146.2880096435547, 64.73388671875, 63.92076873779297], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000594.npy"}
|
|
{"epoch": 0.8722466960352423, "step": 595, "batch_size": 64, "mean": 227.11721801757812, "std": 263.12054443359375, "min": -210.10670471191406, "p10": -50.791429901123045, "median": 178.67347717285156, "p90": 565.7274902343751, "max": 1213.2879638671875, "pos_frac": 0.796875, "sample": [152.674560546875, 315.80010986328125, 260.8476257324219, 126.1304702758789, -22.22180938720703, 654.4314575195312, 154.46588134765625, 406.2081604003906, -50.229366302490234, 200.646484375, 488.7548828125, 137.70851135253906, 160.504638671875, 577.32568359375, -210.10670471191406, 321.48541259765625, 161.04043579101562, 1085.794677734375, 201.23683166503906, 28.421138763427734, -77.21749877929688, -73.4932861328125, 261.3365173339844, 309.0889892578125, -88.97492218017578, -83.5960693359375, 239.32049560546875, 61.062171936035156, 259.12921142578125, 197.8839111328125, 80.86617279052734, 190.03231811523438, 136.38714599609375, 165.09515380859375, 596.2091064453125, 95.53746032714844, 184.73385620117188, 113.56474304199219, 477.6363525390625, 402.807373046875, -14.675979614257812, -51.03231430053711, 276.0995178222656, -160.45382690429688, 190.69674682617188, 163.2007598876953, 218.04270935058594, -40.00968933105469, 341.1951599121094, 476.4953918457031, -9.321113586425781, 605.6783447265625, 13.618904113769531, 55.40251159667969, 243.54705810546875, 172.61309814453125, 538.6650390625, 68.72625732421875, -0.012237548828125, 112.97998046875, 648.1358642578125, 389.6448669433594, 484.64886474609375, 1213.2879638671875], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000595.npy"}
|
|
{"epoch": 0.8737151248164464, "step": 596, "batch_size": 64, "mean": 196.3986358642578, "std": 277.86737060546875, "min": -562.126708984375, "p10": -151.1480972290039, "median": 154.7069091796875, "p90": 590.3395324707031, "max": 946.3042602539062, "pos_frac": 0.796875, "sample": [-175.34808349609375, -29.10528564453125, 242.66180419921875, 33.54212951660156, 364.0902099609375, 477.4256286621094, 48.331871032714844, 419.95025634765625, 323.6198425292969, 161.9150390625, 600.8270874023438, 114.79403686523438, -152.75714111328125, 349.1309814453125, 605.2239379882812, 592.252197265625, 128.92909240722656, 122.20433044433594, 430.76788330078125, 327.05865478515625, -64.20173645019531, 473.53076171875, 248.9374542236328, 172.8861846923828, -189.64816284179688, 591.2542724609375, 72.78931427001953, 219.80239868164062, 235.58653259277344, 82.78727722167969, -147.39366149902344, 125.3679428100586, -53.339805603027344, 849.1075439453125, 141.63021850585938, 442.3792724609375, 695.871826171875, 322.6674499511719, 159.25802612304688, 14.535316467285156, 213.5711669921875, 59.206077575683594, 127.57264709472656, 124.15387725830078, 73.40886688232422, 394.2459716796875, 588.2051391601562, 132.2419891357422, -65.76258087158203, 109.58317565917969, 480.1475524902344, 16.074209213256836, -562.126708984375, 150.15579223632812, 177.85174560546875, -263.0110778808594, 68.81196594238281, -52.7103385925293, 349.755126953125, 266.5010681152344, 946.3042602539062, -159.77560424804688, 369.5183410644531, -353.73309326171875], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000596.npy"}
|
|
{"epoch": 0.8751835535976505, "step": 597, "batch_size": 64, "mean": 261.7131652832031, "std": 283.9962463378906, "min": -383.59674072265625, "p10": -44.62995223999023, "median": 236.75540161132812, "p90": 558.0184936523438, "max": 1150.5526123046875, "pos_frac": 0.859375, "sample": [336.4338073730469, 240.40115356445312, 313.1658020019531, 235.0529022216797, 232.90081787109375, 890.1312255859375, -2.1509056091308594, 516.3477783203125, 377.5741271972656, 175.0500030517578, -72.90544891357422, 2.998260498046875, 88.1074447631836, 49.611629486083984, 456.45477294921875, 207.93017578125, 422.5303955078125, 198.3939971923828, 345.8091125488281, 683.8662109375, 238.45790100097656, 317.13531494140625, 120.64535522460938, 545.69287109375, 126.93791198730469, 154.95150756835938, 22.224178314208984, 82.70439147949219, 80.15574645996094, -383.59674072265625, -373.11688232421875, -61.049957275390625, 360.13751220703125, 177.9302520751953, 360.9071960449219, 1017.464599609375, 451.0740966796875, -44.00706481933594, -84.13114929199219, 177.45950317382812, 480.9490051269531, 472.078369140625, 120.8337173461914, 329.15771484375, 73.01708221435547, 244.83987426757812, 294.30718994140625, 431.8470458984375, 53.84080505371094, 563.3009033203125, 133.5479736328125, -183.94622802734375, -44.89690399169922, 1150.5526123046875, 790.9071044921875, 135.2233428955078, 363.65728759765625, 257.84246826171875, 507.2557067871094, 600.1577758789062, 2.1171112060546875, 498.6902770996094, 109.0990982055664, 379.5843200683594], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000597.npy"}
|
|
{"epoch": 0.8766519823788547, "step": 598, "batch_size": 64, "mean": 228.59048461914062, "std": 273.4523010253906, "min": -533.2032470703125, "p10": -70.63161315917968, "median": 221.00003051757812, "p90": 553.1660034179688, "max": 776.2029418945312, "pos_frac": 0.828125, "sample": [153.98927307128906, 538.6754150390625, 150.62010192871094, 543.3226318359375, -390.1524353027344, 405.1922302246094, 490.3515625, 438.51593017578125, 112.916748046875, 547.229248046875, 75.04165649414062, -341.2785339355469, -533.2032470703125, 41.45512390136719, 532.8846435546875, -27.128183364868164, 594.477294921875, 292.54730224609375, 776.2029418945312, 351.7762756347656, 160.3165283203125, 513.4068603515625, 555.7103271484375, 134.90872192382812, 178.09352111816406, 387.5043029785156, 399.46533203125, 60.059200286865234, -419.83734130859375, 18.431406021118164, 262.5239562988281, 209.02926635742188, 232.97079467773438, -79.11710357666016, -72.15327453613281, 392.5003662109375, 52.98964309692383, 532.0384521484375, 133.5124053955078, 245.16571044921875, 131.29080200195312, 174.94296264648438, -216.30874633789062, 387.9769592285156, -67.08106994628906, 598.804443359375, 122.38487243652344, 258.1886901855469, 125.08415222167969, 150.5399169921875, 361.35467529296875, 606.6279296875, 557.752197265625, 337.216064453125, 412.53912353515625, 126.47870635986328, 265.8111572265625, 527.0596923828125, 409.71197509765625, 86.39187622070312, 56.45176696777344, -3.2598628997802734, 614.6185913085938, -45.74216842651367], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000598.npy"}
|
|
{"epoch": 0.8781204111600588, "step": 599, "batch_size": 64, "mean": 209.42332458496094, "std": 214.16954040527344, "min": -350.1422424316406, "p10": -83.10486526489255, "median": 218.21429443359375, "p90": 485.1252075195313, "max": 692.7730102539062, "pos_frac": 0.84375, "sample": [267.6372375488281, 308.28167724609375, 152.1824493408203, 201.18365478515625, 250.9560546875, 403.59136962890625, 398.2457275390625, 134.16558837890625, 299.18560791015625, 345.01885986328125, 488.9814453125, 48.72150421142578, 8.939657211303711, 165.62973022460938, 1.0862483978271484, 196.6610565185547, 354.4487609863281, 257.6690673828125, 249.74151611328125, 476.1273193359375, 119.34783935546875, 359.16802978515625, 329.9495544433594, 263.5451354980469, 215.4586944580078, -157.63246154785156, 133.3021240234375, 244.69313049316406, 215.49778747558594, -350.1422424316406, 548.5261840820312, 63.43434143066406, 489.5257568359375, 352.53515625, 489.00335693359375, 492.3961181640625, 400.26556396484375, -44.207611083984375, 601.9566650390625, -40.065330505371094, 248.06985473632812, -112.24969482421875, 471.8769226074219, 95.44401550292969, 252.3685760498047, 315.4100036621094, 407.5303649902344, -95.84371185302734, 216.84544372558594, 203.8489990234375, 396.6497802734375, 58.427452087402344, -109.93212127685547, 12.645011901855469, -307.2995300292969, 143.46591186523438, 289.99066162109375, 176.44815063476562, 219.58314514160156, 692.7730102539062, -53.380889892578125, -235.41864013671875, 165.09271240234375, 215.73452758789062], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000599.npy"}
|
|
{"epoch": 0.8795888399412628, "step": 600, "batch_size": 64, "mean": 222.00851440429688, "std": 286.5669250488281, "min": -418.77777099609375, "p10": -96.41036605834958, "median": 197.79842376708984, "p90": 631.0073852539064, "max": 856.615478515625, "pos_frac": 0.828125, "sample": [114.7999038696289, 200.0798797607422, 398.18865966796875, -364.6869812011719, 497.2518615722656, 348.78704833984375, 21.76511001586914, 345.68951416015625, -65.80376434326172, 425.4386901855469, 111.9497299194336, 234.02871704101562, 566.4144287109375, 55.411720275878906, 143.88497924804688, 578.6668701171875, -169.7115478515625, 646.8421630859375, 45.39665985107422, 135.58828735351562, 435.0757751464844, 660.12060546875, 38.49888610839844, 202.01351928710938, 71.49299621582031, -43.88267517089844, 409.64715576171875, 856.615478515625, 738.1414794921875, 195.5169677734375, 192.4576873779297, 321.7328186035156, 594.0595703125, 206.60768127441406, 690.99755859375, 297.4806823730469, -109.52748107910156, 206.0146942138672, 16.2583065032959, 443.41571044921875, 828.261474609375, 115.61628723144531, -57.31236267089844, 80.0974349975586, 707.7229614257812, 132.39352416992188, 511.65264892578125, 200.80552673339844, -219.0652313232422, 251.98635864257812, 140.2129669189453, 296.54986572265625, 393.04638671875, 64.44371795654297, 235.4497528076172, 548.3535766601562, -345.8202819824219, 78.85934448242188, 57.249839782714844, -249.92715454101562, -418.77777099609375, 73.16813659667969, -65.01764678955078, 155.87742614746094], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000600.npy"}
|
|
{"epoch": 0.8810572687224669, "step": 601, "batch_size": 64, "mean": 242.435791015625, "std": 277.4962463378906, "min": -332.72625732421875, "p10": -100.53205337524412, "median": 244.90296173095703, "p90": 598.7515075683593, "max": 1004.8436279296875, "pos_frac": 0.828125, "sample": [269.9934387207031, 499.72723388671875, 1004.8436279296875, 436.7630615234375, -18.207080841064453, 597.6043090820312, 264.5387878417969, 197.2431640625, -29.624717712402344, 347.5560302734375, -111.5962142944336, -323.22705078125, 609.4549560546875, 344.79156494140625, 215.31777954101562, 354.5592041015625, 94.59342193603516, 352.34307861328125, 163.29220581054688, 891.4046020507812, 353.04937744140625, 409.6903076171875, 32.40655517578125, 90.35935974121094, 226.14022827148438, 275.4130859375, 13.15583610534668, 254.95516967773438, 673.8653564453125, 151.14166259765625, 175.69760131835938, 146.44956970214844, -324.7159729003906, 186.24169921875, -144.87393188476562, 414.8096923828125, 50.806800842285156, 241.2324981689453, 76.68118286132812, 248.57342529296875, 181.0438232421875, 444.2895812988281, 107.95233917236328, -277.8914794921875, 170.9364471435547, -332.72625732421875, 33.12457275390625, 848.493408203125, 435.6199645996094, -64.60051727294922, 385.4060363769531, 322.1402587890625, 76.80448913574219, 277.3610534667969, -107.28486633300781, 688.2724609375, -84.7754898071289, 599.2431640625, 496.2740173339844, 407.08172607421875, 356.94818115234375, 164.72044372558594, 389.275390625, 285.7310791015625], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000601.npy"}
|
|
{"epoch": 0.882525697503671, "step": 602, "batch_size": 64, "mean": 279.1749267578125, "std": 258.6432800292969, "min": -262.65313720703125, "p10": -13.928692626953097, "median": 206.96327209472656, "p90": 599.9614196777344, "max": 942.4072875976562, "pos_frac": 0.890625, "sample": [181.7069091796875, 182.864501953125, 23.763286590576172, 120.843505859375, 204.66098022460938, 154.0203857421875, 586.4737548828125, 425.56298828125, 153.2959442138672, 160.4659881591797, 291.86474609375, 212.54734802246094, 209.26556396484375, 343.4822998046875, 785.4957275390625, 81.92317962646484, 600.8102416992188, 197.31106567382812, 374.4829406738281, 597.9808349609375, 12.986572265625, 169.2784423828125, -25.46380615234375, -262.65313720703125, 171.98597717285156, 378.1251220703125, 382.84625244140625, 286.3031005859375, -175.95794677734375, -30.26390266418457, 42.794639587402344, 439.5367126464844, 361.9990234375, 113.84657287597656, 275.1291809082031, 530.0332641601562, 174.28854370117188, 372.77349853515625, 160.65768432617188, -209.42605590820312, 803.1329345703125, 444.2346496582031, 436.466552734375, 294.2213134765625, 508.86273193359375, 483.8333740234375, 942.4072875976562, 33.17546081542969, 481.12115478515625, -54.87223815917969, 631.0914306640625, 440.0339050292969, 190.98934936523438, 143.54124450683594, 759.7689208984375, 89.09954833984375, -27.18999481201172, 348.52850341796875, 82.22346496582031, 132.0760498046875, 495.18035888671875, 929.7981567382812, 203.41781616210938, 18.411697387695312], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000602.npy"}
|
|
{"epoch": 0.8839941262848752, "step": 603, "batch_size": 64, "mean": 242.5406494140625, "std": 292.62542724609375, "min": -285.736572265625, "p10": -113.5457122802734, "median": 185.1520233154297, "p90": 642.310760498047, "max": 1075.4404296875, "pos_frac": 0.78125, "sample": [-85.14544677734375, -127.82542419433594, 334.97308349609375, 32.80951690673828, 691.8433227539062, -3.4736328125, 536.2611694335938, 65.89045715332031, 313.026123046875, 518.5864868164062, 139.93783569335938, 268.4847106933594, -253.03904724121094, 412.41552734375, 46.298702239990234, 208.77487182617188, 37.75372314453125, 594.69189453125, 569.2796630859375, 182.02545166015625, 741.1022338867188, 381.75653076171875, 308.2088317871094, 24.462909698486328, -0.2271575927734375, 218.8372802734375, 422.39910888671875, 152.5001983642578, 63.677310943603516, 156.4840087890625, 653.415283203125, -285.736572265625, 574.4027099609375, 123.55987548828125, 13.9345703125, 412.630126953125, 616.4002075195312, -133.84835815429688, 175.7427215576172, 250.721435546875, 395.3971862792969, 91.74188995361328, -23.226821899414062, 34.64614486694336, -46.238121032714844, 327.09368896484375, 195.9667510986328, 150.2820587158203, -163.6162109375, 230.125732421875, -52.507041931152344, 573.0518798828125, 445.90411376953125, -125.71725463867188, 188.27859497070312, -144.4500732421875, 1075.4404296875, 154.09722900390625, 860.828125, 11.224029541015625, 747.5049438476562, 588.124755859375, 676.3232421875, -21.66592025756836], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000603.npy"}
|
|
{"epoch": 0.8854625550660793, "step": 604, "batch_size": 64, "mean": 279.1767578125, "std": 278.9923400878906, "min": -473.6326599121094, "p10": -41.83755416870117, "median": 276.94358825683594, "p90": 595.8529846191407, "max": 900.4811401367188, "pos_frac": 0.84375, "sample": [-44.024742126464844, 157.677490234375, 6.567512512207031, -50.27885437011719, 458.76641845703125, 362.2663269042969, 528.6832885742188, 531.8560791015625, 226.2960662841797, 364.90692138671875, -326.6573791503906, 592.368408203125, 207.7165985107422, -65.50980377197266, 567.8623046875, 431.546875, 390.91314697265625, 63.91964340209961, 277.70849609375, 449.282958984375, 183.72637939453125, -281.12054443359375, 119.04051208496094, 597.3463745117188, 120.68126678466797, -473.6326599121094, 680.2962646484375, -36.73411560058594, 171.25015258789062, 38.7459716796875, -103.54695892333984, 232.46702575683594, 213.37306213378906, 483.1578063964844, 214.30545043945312, 782.5762939453125, 427.97601318359375, 7.168403625488281, 152.21949768066406, 301.9106140136719, 487.36163330078125, 359.78778076171875, 329.4815979003906, 124.63652801513672, 502.1977233886719, 781.5308227539062, 151.9410858154297, 900.4811401367188, 305.341796875, 174.7468719482422, 541.4893798828125, 580.797607421875, 472.81646728515625, 148.64398193359375, 648.0675659179688, 276.1786804199219, -23.90772247314453, 22.8326416015625, 190.2701416015625, 348.91448974609375, -22.78857421875, 352.97711181640625, 884.837158203125, 365.6024475097656], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000604.npy"}
|
|
{"epoch": 0.8869309838472834, "step": 605, "batch_size": 64, "mean": 232.87347412109375, "std": 291.23577880859375, "min": -432.0559387207031, "p10": -64.45712966918944, "median": 209.50896453857422, "p90": 571.416766357422, "max": 1473.9625244140625, "pos_frac": 0.84375, "sample": [32.34950256347656, -59.686729431152344, 5.962554931640625, -12.313726425170898, 215.1572265625, 95.14002990722656, 203.86070251464844, 418.6309814453125, 527.9248657226562, 140.18954467773438, 407.7654113769531, -37.515899658203125, 59.719207763671875, -92.38311004638672, 246.67747497558594, 242.174072265625, 133.12937927246094, 658.0341186523438, 861.579833984375, 3.0643768310546875, 1473.9625244140625, 142.92611694335938, 187.7333984375, 190.9520263671875, 295.65289306640625, 241.5191650390625, 70.4140625, 259.8724060058594, 36.24677658081055, 250.90631103515625, 93.26985931396484, -163.34031677246094, 454.5461120605469, -209.29287719726562, -71.87969970703125, 333.9184265136719, 714.3453369140625, 84.94284057617188, 135.39691162109375, 590.05615234375, 89.19065856933594, 954.3271484375, -155.80198669433594, -432.0559387207031, 454.00006103515625, 253.3174591064453, -66.5015869140625, 462.2433776855469, 238.38360595703125, 139.10464477539062, 287.0318298339844, 261.233642578125, 367.4894714355469, 245.76699829101562, 151.04576110839844, 218.68380737304688, 135.26681518554688, 247.4046630859375, 237.3942108154297, 57.73078918457031, 489.0865478515625, 333.9771423339844, 166.30419921875, 607.6701049804688], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000605.npy"}
|
|
{"epoch": 0.8883994126284875, "step": 606, "batch_size": 64, "mean": 245.47132873535156, "std": 260.6863708496094, "min": -293.81390380859375, "p10": -37.56775665283203, "median": 223.77615356445312, "p90": 560.9674316406251, "max": 986.8348999023438, "pos_frac": 0.84375, "sample": [-68.357421875, -52.363800048828125, 342.09429931640625, -173.09738159179688, 282.9956970214844, 500.00579833984375, 26.95901870727539, 127.91778564453125, 459.62176513671875, 613.1205444335938, 100.9283447265625, 935.318603515625, 495.4156494140625, 20.35528564453125, 29.470794677734375, 108.98675537109375, 208.72439575195312, 100.24735260009766, 224.80654907226562, 512.7598876953125, 309.5995178222656, -167.64697265625, 652.0614624023438, 14.796195983886719, 716.2113037109375, 367.0577392578125, 258.05853271484375, 80.37245178222656, 22.735939025878906, -87.19453430175781, 239.1063995361328, 485.1090393066406, -35.8011474609375, 143.96963500976562, -293.81390380859375, 74.80751037597656, 222.74575805664062, 255.75552368164062, -3.2338619232177734, -38.32487487792969, 22.561416625976562, 216.525634765625, 986.8348999023438, 417.61590576171875, 176.09619140625, 275.2922668457031, 269.5133056640625, 427.90350341796875, 401.9310302734375, 179.63873291015625, 114.80445861816406, 579.23828125, -27.973342895507812, 251.84825134277344, 381.891357421875, 797.1146240234375, 21.562759399414062, 441.47613525390625, 79.67192840576172, 332.74664306640625, 518.33544921875, 279.4180908203125, 429.21099853515625, 124.62533569335938], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000606.npy"}
|
|
{"epoch": 0.8898678414096917, "step": 607, "batch_size": 64, "mean": 205.24591064453125, "std": 240.4586181640625, "min": -371.6351318359375, "p10": -79.41990051269529, "median": 174.52132415771484, "p90": 522.9697326660157, "max": 731.2630615234375, "pos_frac": 0.796875, "sample": [284.243896484375, 137.0318145751953, 299.94146728515625, 104.29875183105469, 54.60271453857422, 128.35252380371094, 663.8453369140625, 439.36065673828125, 7.3018951416015625, 662.2841796875, -130.54388427734375, 328.9431457519531, 239.41983032226562, -44.32312774658203, 75.68109130859375, 731.2630615234375, -35.0056266784668, 35.09264373779297, 92.69559478759766, 45.389259338378906, -85.88516235351562, -116.72367858886719, 98.36321258544922, 339.77923583984375, -163.11865234375, 457.1673583984375, -46.90644073486328, 461.85870361328125, 392.3007507324219, 111.47383117675781, 360.0000305175781, 53.38898468017578, 531.6284790039062, 126.7503662109375, 466.79052734375, 383.109375, 494.19940185546875, -64.33428955078125, 535.257080078125, 155.59103393554688, -371.6351318359375, 351.8680419921875, -115.30380249023438, 457.82916259765625, 285.7479553222656, 82.61300659179688, -28.613853454589844, 278.71856689453125, -136.20913696289062, 220.705810546875, 174.90716552734375, 243.84457397460938, 18.226837158203125, 235.345458984375, 0.6042709350585938, 2.6302032470703125, 365.7611389160156, 696.1839599609375, 554.7449340820312, 502.7659912109375, 174.13548278808594, -35.638877868652344, 360.9035339355469, 205.0369415283203], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000607.npy"}
|
|
{"epoch": 0.8913362701908958, "step": 608, "batch_size": 64, "mean": 234.29241943359375, "std": 228.80787658691406, "min": -211.95701599121094, "p10": -61.110523223876946, "median": 229.91390991210938, "p90": 525.1956298828126, "max": 930.4305419921875, "pos_frac": 0.828125, "sample": [433.9396667480469, 477.90997314453125, 129.455078125, 181.43865966796875, -57.31110382080078, 448.9862060546875, 101.0028305053711, 52.51778030395508, 278.71942138671875, 401.5989685058594, 451.63250732421875, -100.53053283691406, 97.72716522216797, 403.651611328125, -62.73884582519531, 186.8725128173828, -191.54946899414062, 510.24005126953125, 46.634185791015625, 356.8219909667969, 398.9034729003906, 262.69818115234375, 69.14398193359375, 323.93145751953125, -128.97152709960938, 263.56658935546875, 296.0249938964844, 329.0122985839844, 560.7347412109375, -56.28633117675781, 57.25771713256836, -26.372482299804688, 191.70555114746094, 206.7796173095703, 154.0475616455078, 253.04820251464844, 110.09333801269531, 74.34226989746094, 538.64453125, -23.78478240966797, 930.4305419921875, 278.1855773925781, 537.5813598632812, 192.22418212890625, 383.2437744140625, 141.86407470703125, 18.45177459716797, 379.9878845214844, 346.6919860839844, 97.12165832519531, 488.8136291503906, -91.96300506591797, 333.0505065917969, 508.26495361328125, -211.95701599121094, 531.6051635742188, -64.90278625488281, 76.73501586914062, 606.8072509765625, 437.3515625, 337.7414245605469, 554.05126953125, 161.1096649169922, 20.685546875], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000608.npy"}
|
|
{"epoch": 0.8928046989720999, "step": 609, "batch_size": 64, "mean": 272.02984619140625, "std": 277.5064392089844, "min": -515.054443359375, "p10": -0.9657009124755636, "median": 253.47393798828125, "p90": 552.7352416992187, "max": 1216.92236328125, "pos_frac": 0.890625, "sample": [175.00457763671875, 202.59597778320312, 91.9301528930664, -204.4089813232422, 240.68003845214844, 346.5436096191406, 553.2843017578125, 288.1665344238281, 303.7279357910156, 25.1917724609375, 507.3251037597656, 551.4541015625, 73.15623474121094, 311.8404541015625, 419.38330078125, -515.054443359375, -11.599029541015625, 311.45574951171875, 385.6427307128906, 134.81878662109375, 202.31942749023438, 247.21527099609375, 235.13949584960938, 436.4147033691406, 80.8930435180664, 417.2499084472656, 788.5924072265625, 133.65805053710938, 77.36898803710938, 335.86627197265625, 174.22882080078125, 333.763671875, 893.0578002929688, 419.848876953125, 1216.92236328125, 640.3914794921875, -46.39319610595703, 368.87255859375, 42.342933654785156, 267.5653381347656, 194.30715942382812, 272.35430908203125, 365.53173828125, 1111.192626953125, 344.7049560546875, 259.73260498046875, 307.68402099609375, -77.74571228027344, -107.48143005371094, 111.53801727294922, 296.22711181640625, 144.90927124023438, 223.80641174316406, 90.51197814941406, 20.44397735595703, 237.6192626953125, 189.87118530273438, 77.49755859375, 502.0547790527344, 676.515380859375, 45.54869842529297, 409.3311767578125, 267.43939208984375, -10.141277313232422], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000609.npy"}
|
|
{"epoch": 0.8942731277533039, "step": 610, "batch_size": 64, "mean": 208.34732055664062, "std": 244.150634765625, "min": -352.5816345214844, "p10": -110.65841522216796, "median": 225.34882354736328, "p90": 527.4266479492188, "max": 762.3944702148438, "pos_frac": 0.78125, "sample": [216.99990844726562, 399.7818298339844, 275.1536865234375, 237.09413146972656, -129.93826293945312, 304.2835693359375, 57.97727966308594, 296.25555419921875, 249.9654541015625, -113.44476318359375, 61.56446075439453, 282.29937744140625, -3.5373153686523438, 638.69677734375, 579.1158447265625, 268.27008056640625, 158.72305297851562, 156.55662536621094, -231.50784301757812, -135.4441375732422, 156.3289337158203, 532.6796875, 172.69830322265625, 588.970703125, -193.96533203125, 236.11520385742188, 95.51214599609375, -20.893295288085938, 646.8742065429688, 498.3226013183594, 753.45166015625, -9.948959350585938, 44.597434997558594, 515.1695556640625, 204.84136962890625, 364.9846496582031, -32.502037048339844, 275.0681457519531, 116.1234359741211, 378.40020751953125, 349.71612548828125, 144.65528869628906, -104.15693664550781, 108.80918884277344, 357.4606628417969, 154.56466674804688, 410.47052001953125, 130.71914672851562, -102.47055053710938, 334.577880859375, 26.926025390625, 430.90142822265625, 132.04066467285156, 15.514209747314453, 405.960693359375, 271.8177490234375, 269.2315368652344, 243.774658203125, -47.87647247314453, 233.69773864746094, -228.32135009765625, -352.5816345214844, 494.71014404296875, 762.3944702148438], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000610.npy"}
|
|
{"epoch": 0.895741556534508, "step": 611, "batch_size": 64, "mean": 257.668701171875, "std": 263.00653076171875, "min": -252.81439208984375, "p10": -64.99239349365232, "median": 228.68619537353516, "p90": 639.1025268554688, "max": 792.2498168945312, "pos_frac": 0.84375, "sample": [461.4204406738281, 190.0165557861328, 792.2498168945312, 659.763671875, 471.117431640625, 309.2808532714844, -20.051124572753906, 497.95098876953125, 155.79458618164062, 192.34274291992188, 248.17474365234375, 636.71875, -138.11680603027344, 60.74312210083008, -82.98651123046875, -4.764444351196289, 10.125886917114258, -201.9249725341797, 191.00997924804688, 640.1241455078125, -252.81439208984375, 528.0797729492188, 462.9254150390625, -187.96429443359375, 488.5137634277344, 384.8204345703125, 222.07122802734375, 743.2277221679688, 260.1367492675781, 158.3534698486328, 45.3349609375, 563.3673095703125, 271.009033203125, 7.534875869750977, 125.9384765625, 439.2294921875, 136.69337463378906, 145.17091369628906, -74.4854736328125, 249.70034790039062, 424.4964599609375, 266.71600341796875, -42.84187316894531, 694.0443725585938, 247.7530975341797, 263.83953857421875, 53.294105529785156, 215.68533325195312, 520.0964965820312, 69.40267181396484, 421.12371826171875, -182.55618286132812, 153.43975830078125, 226.03179931640625, 95.11572265625, 59.6195068359375, 418.8619689941406, 518.1755981445312, 231.34059143066406, 567.5276489257812, 735.2155151367188, 8.457344055175781, 722.0001831054688, 18.12551498413086], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000611.npy"}
|
|
{"epoch": 0.8972099853157122, "step": 612, "batch_size": 64, "mean": 272.8205871582031, "std": 236.8634033203125, "min": -230.5735626220703, "p10": 6.8601711273193375, "median": 282.6349182128906, "p90": 608.5134033203126, "max": 850.222900390625, "pos_frac": 0.90625, "sample": [655.6754150390625, 334.3136901855469, 398.9566345214844, -202.38516235351562, 18.61300277709961, 186.2458953857422, 521.6406860351562, 313.27801513671875, 176.95289611816406, -21.82630729675293, -124.46536254882812, 612.4962768554688, 128.4735107421875, 756.696044921875, 384.24481201171875, 544.3307495117188, 442.2331848144531, 207.9957275390625, 287.85174560546875, 598.8914794921875, 440.7082824707031, 108.02942657470703, -230.5735626220703, 474.0574951171875, 634.5026245117188, 47.24835205078125, 270.7606506347656, -44.8525390625, 347.3428649902344, 474.3756103515625, 388.3438720703125, 80.53616333007812, 129.57887268066406, 128.87850952148438, 407.1749572753906, 6.113697052001953, 291.58538818359375, 244.17132568359375, 117.29052734375, 95.69940185546875, 148.12986755371094, 277.4180908203125, 50.5413818359375, 210.62533569335938, 632.7974243164062, 51.519718170166016, 596.0948486328125, 368.29266357421875, 850.222900390625, 289.86383056640625, 70.30305480957031, 389.8755798339844, 623.5263671875, 108.30953216552734, 8.601943969726562, 66.90406799316406, 376.98736572265625, 46.45921325683594, 339.21502685546875, 95.70887756347656, 599.2200317382812, 304.625244140625, 329.3385314941406, -5.24786376953125], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000612.npy"}
|
|
{"epoch": 0.8986784140969163, "step": 613, "batch_size": 64, "mean": 253.05880737304688, "std": 221.54954528808594, "min": -254.45803833007812, "p10": 26.385455513000494, "median": 232.0474624633789, "p90": 528.0387451171875, "max": 968.3555908203125, "pos_frac": 0.953125, "sample": [400.54425048828125, 327.04827880859375, 421.4737548828125, -33.91340255737305, 342.30303955078125, 329.544921875, 93.59724426269531, 305.1745910644531, 389.7712097167969, 59.32538604736328, 575.3289794921875, 48.941856384277344, 351.4457092285156, 124.19588470458984, 796.9536743164062, 152.7337188720703, -34.78987121582031, 511.24993896484375, 202.3493194580078, 62.09367370605469, 332.0223388671875, 65.53267669677734, 312.1473083496094, 504.18402099609375, 453.84710693359375, 228.54774475097656, 535.2339477539062, 457.50927734375, 44.99237060546875, 279.39678955078125, 308.88275146484375, 106.15530395507812, 23.997045516967773, 427.92596435546875, 161.3009490966797, 560.8359375, 140.10301208496094, 304.62384033203125, 968.3555908203125, 1.9190139770507812, 31.958412170410156, 557.3094482421875, 341.1653747558594, 101.95873260498047, 149.0478515625, 0.306304931640625, 94.52412414550781, 784.0637817382812, 13.950260162353516, 140.18341064453125, 130.00669860839844, 136.59860229492188, -254.45803833007812, 235.54718017578125, 426.67340087890625, 270.5649719238281, 148.6068115234375, 262.5100402832031, 32.98400115966797, 294.1845397949219, 51.25090026855469, 135.6544189453125, 416.5635681152344, 51.72364807128906], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000613.npy"}
|
|
{"epoch": 0.9001468428781204, "step": 614, "batch_size": 64, "mean": 258.2608642578125, "std": 262.0122985839844, "min": -388.2573547363281, "p10": -110.55160903930661, "median": 262.15421295166016, "p90": 605.2354248046876, "max": 969.721435546875, "pos_frac": 0.8125, "sample": [106.98963165283203, 461.9990234375, -78.42892456054688, 479.0779113769531, 181.40724182128906, 581.2301635742188, 62.37167739868164, 327.26031494140625, 251.60804748535156, -46.50301742553711, 434.0037841796875, 93.30146789550781, 304.1382141113281, 289.77899169921875, 272.70037841796875, -130.37860107421875, 451.321533203125, 585.7688598632812, 222.82447814941406, 7.318950653076172, 606.979248046875, 4.282318115234375, 241.16481018066406, 374.1019287109375, -155.66351318359375, -39.25926971435547, 324.286865234375, 178.39785766601562, 285.3050537109375, -45.76387023925781, 231.56350708007812, 601.16650390625, -141.11587524414062, 274.02325439453125, -160.83456420898438, -2.6989212036132812, 352.20904541015625, -128.9027557373047, 168.80970764160156, 450.30133056640625, 472.8261413574219, 100.98017883300781, 553.4271240234375, 198.1129150390625, 610.8170166015625, 651.7593383789062, 319.40582275390625, 190.1177978515625, 338.14276123046875, 456.8105773925781, 135.3045654296875, 164.87176513671875, 531.6435546875, 101.68890380859375, 615.898193359375, 969.721435546875, 637.1326293945312, 91.96551513671875, -124.31847381591797, -388.2573547363281, 693.4075317382812, 205.83038330078125, 294.7316589355469, 430.5314636230469], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000614.npy"}
|
|
{"epoch": 0.9016152716593245, "step": 615, "batch_size": 64, "mean": 207.64224243164062, "std": 241.3807373046875, "min": -400.00726318359375, "p10": -27.62821979522705, "median": 177.11316680908203, "p90": 471.12085876464846, "max": 1006.5362548828125, "pos_frac": 0.859375, "sample": [81.61178588867188, 505.72906494140625, 91.36141967773438, 455.91290283203125, 108.53425598144531, 440.6642150878906, 23.724594116210938, 53.05481719970703, 67.5430908203125, 76.98086547851562, 503.8264465332031, 176.84815979003906, 177.378173828125, -28.5526123046875, 390.9869384765625, -181.57064819335938, 93.87921142578125, -159.60247802734375, 231.0420684814453, 234.34933471679688, 157.77142333984375, 80.99380493164062, 802.269287109375, 55.85918426513672, 64.78828430175781, 427.8788757324219, 65.5954360961914, 450.2535400390625, 316.1767578125, 182.63169860839844, 226.20046997070312, 289.90350341796875, 365.81396484375, 266.09466552734375, 210.33489990234375, -21.736595153808594, 255.74647521972656, 129.4129638671875, -46.42192077636719, 8.875268936157227, -400.00726318359375, 459.067626953125, 228.44717407226562, 23.567779541015625, 476.2865295410156, 378.1842956542969, 183.43917846679688, 750.3169555664062, 113.7650146484375, 357.1634216308594, 378.565185546875, -126.90751647949219, 213.5803985595703, 37.376922607421875, 33.726463317871094, -25.471303939819336, 152.19448852539062, 259.84161376953125, 297.7279357910156, 704.5387573242188, -92.20423889160156, 1006.5362548828125, 160.1838836669922, 87.0401611328125], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000615.npy"}
|
|
{"epoch": 0.9030837004405287, "step": 616, "batch_size": 64, "mean": 163.54129028320312, "std": 255.60948181152344, "min": -311.0933532714844, "p10": -202.66647338867182, "median": 133.62887573242188, "p90": 517.8963623046875, "max": 1037.3125, "pos_frac": 0.75, "sample": [451.47601318359375, -12.816925048828125, 128.400146484375, 366.4791259765625, 208.94882202148438, 80.14502716064453, 104.991455078125, -101.10995483398438, 179.58871459960938, 665.6932983398438, 539.091552734375, 333.6689758300781, 65.9690170288086, 197.71775817871094, -29.565536499023438, 194.8167266845703, 211.22286987304688, 160.62896728515625, 185.31109619140625, -50.852806091308594, 625.1986083984375, 407.9592590332031, 28.295501708984375, 47.140174865722656, -242.41622924804688, -276.38800048828125, 272.4424743652344, 158.70938110351562, 73.0590591430664, 536.8123779296875, 1037.3125, 113.17127227783203, -311.0933532714844, 133.94967651367188, 14.001934051513672, -236.17262268066406, -38.288414001464844, -6.291225433349609, 133.30807495117188, 386.4686279296875, 232.1493377685547, 301.97509765625, 96.04450988769531, 73.26599884033203, -222.90859985351562, 349.3072204589844, 139.9447784423828, -33.95115661621094, 70.70392608642578, 380.99169921875, 59.26454162597656, -155.43484497070312, 507.838134765625, 85.10655212402344, 356.4930419921875, 116.36453247070312, 402.2666015625, 188.75746154785156, -55.75811767578125, 229.42648315429688, 522.20703125, -245.74278259277344, 557.2806396484375, -225.93215942382812], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000616.npy"}
|
|
{"epoch": 0.9045521292217328, "step": 617, "batch_size": 64, "mean": 160.45773315429688, "std": 242.84188842773438, "min": -308.43292236328125, "p10": -101.41544494628906, "median": 125.22352600097656, "p90": 434.00452880859393, "max": 952.8743896484375, "pos_frac": 0.734375, "sample": [111.65925598144531, 107.05401611328125, -104.16064453125, 339.1839904785156, 58.63792419433594, -113.68370819091797, 48.45439910888672, -285.6794128417969, 380.6244201660156, 63.34675598144531, -3.3385848999023438, 156.26956176757812, 284.5392150878906, 395.1191101074219, 62.61912155151367, 173.38795471191406, -13.827425003051758, 393.91064453125, 23.01629638671875, 613.3306274414062, 380.7327880859375, 210.34078979492188, 527.8836669921875, -9.485605239868164, 182.4084014892578, 326.63623046875, 35.465171813964844, -68.67176818847656, 349.795654296875, 9.716781616210938, 4.820283889770508, 323.26690673828125, -104.93690490722656, -39.469749450683594, 317.05780029296875, 265.0549621582031, 361.75048828125, 286.8681640625, 315.7503356933594, 279.93548583984375, 162.8576202392578, -167.34979248046875, 470.2844543457031, 364.8198547363281, 859.674560546875, 456.81671142578125, 138.7877960205078, -256.37530517578125, 94.04238891601562, 185.0093536376953, -308.43292236328125, 21.567428588867188, 28.70745086669922, 152.55789184570312, 105.7715072631836, -79.76350402832031, 450.6697082519531, -19.569061279296875, -48.803672790527344, -40.376129150390625, 952.8743896484375, 183.09283447265625, 12.058670043945312, -95.00997924804688], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000617.npy"}
|
|
{"epoch": 0.9060205580029369, "step": 618, "batch_size": 64, "mean": 245.88577270507812, "std": 262.4249267578125, "min": -308.7257385253906, "p10": -104.3266258239746, "median": 243.08921813964844, "p90": 587.6097656250001, "max": 860.14208984375, "pos_frac": 0.828125, "sample": [-308.7257385253906, 382.5380554199219, 207.75006103515625, 125.66971588134766, 177.63259887695312, 192.3027801513672, 234.2518310546875, 62.506290435791016, 736.69873046875, 221.68191528320312, 340.29083251953125, -206.9821014404297, 288.5321960449219, 211.67994689941406, 381.2859802246094, 242.04580688476562, -235.8682861328125, 671.211669921875, 119.9059066772461, -170.69808959960938, -76.71550750732422, 14.06796646118164, -94.18500518798828, 107.75596618652344, -108.67303466796875, -187.66505432128906, 244.13262939453125, 603.045654296875, 643.0579833984375, 176.3201141357422, 99.97016906738281, 60.50212860107422, 484.9920654296875, 530.8623046875, 500.49310302734375, -269.5550231933594, -90.21969604492188, 266.7073669433594, 3.945352554321289, 311.41705322265625, 299.71685791015625, 487.65960693359375, 602.933349609375, 512.0297241210938, 469.3939208984375, -19.578346252441406, 271.43231201171875, 281.36029052734375, 282.5430603027344, 660.2268676757812, 334.6210021972656, 69.3008041381836, 860.14208984375, 551.854736328125, 521.931884765625, 316.04180908203125, 235.81776428222656, 272.5916442871094, 472.80938720703125, 89.22402954101562, 123.64508056640625, 535.7083740234375, 402.01568603515625, 209.3010711669922], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000618.npy"}
|
|
{"epoch": 0.9074889867841409, "step": 619, "batch_size": 64, "mean": 209.35650634765625, "std": 298.5389099121094, "min": -436.20257568359375, "p10": -88.8311897277832, "median": 192.6920166015625, "p90": 625.0803527832031, "max": 988.318603515625, "pos_frac": 0.6875, "sample": [-436.20257568359375, -86.02326965332031, 273.2117919921875, 366.17059326171875, 409.00152587890625, -90.03458404541016, 200.01133728027344, -30.906349182128906, 298.90985107421875, 37.470375061035156, 332.663330078125, 411.1180419921875, 177.66343688964844, 849.1829833984375, -85.82225036621094, 206.85394287109375, 630.4791259765625, 187.28237915039062, 637.8285522460938, 558.24755859375, -297.79327392578125, -75.67814636230469, 612.4832153320312, 607.98193359375, -84.60722351074219, -175.97048950195312, -36.05779266357422, 502.26251220703125, 304.89764404296875, -10.26422119140625, -1.1651611328125, 287.09918212890625, -5.588905334472656, -114.30322265625, 498.023193359375, 19.311431884765625, 97.06524658203125, -73.36470031738281, 314.84259033203125, 198.10165405273438, 126.560302734375, 709.6305541992188, 368.34149169921875, 588.7431030273438, 380.3938293457031, 68.81388854980469, -122.57150268554688, 669.305419921875, 535.0311279296875, 216.023681640625, 60.316017150878906, 74.62847900390625, 273.0541687011719, 988.318603515625, -405.8492126464844, -66.9984130859375, 305.8454284667969, -32.545162200927734, 666.8771362304688, 48.66262435913086, 123.18667602539062, 110.09927368164062, -9.646848678588867, 308.2144775390625], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000619.npy"}
|
|
{"epoch": 0.908957415565345, "step": 620, "batch_size": 64, "mean": 203.23731994628906, "std": 260.2491760253906, "min": -246.59478759765625, "p10": -70.31447525024414, "median": 173.5419158935547, "p90": 552.3133361816408, "max": 1031.54150390625, "pos_frac": 0.78125, "sample": [224.7017822265625, -246.59478759765625, 345.11834716796875, 302.2669982910156, 148.36373901367188, 361.6844482421875, 310.74462890625, 278.1468200683594, -39.86737823486328, 566.0453491210938, -70.40206146240234, 71.01996612548828, -50.928001403808594, 518.36376953125, 428.198974609375, 135.4537353515625, 592.478515625, 71.27113342285156, 266.2531433105469, 614.4580688476562, 344.6666259765625, 99.50155639648438, 96.25282287597656, 313.48406982421875, -154.90432739257812, 197.75852966308594, 336.0257568359375, 61.590789794921875, 214.21510314941406, 42.79191589355469, -173.16268920898438, 582.89208984375, 300.0198974609375, 1031.54150390625, 38.609619140625, 231.3439483642578, -66.43769073486328, 227.78843688964844, 206.64080810546875, 268.4283447265625, 98.4958724975586, 262.4400634765625, -53.63132095336914, -43.51393127441406, 833.5621337890625, 86.45025634765625, 520.27197265625, -244.3890838623047, 119.2293930053711, 293.9853210449219, -70.110107421875, -97.34822082519531, 971.6463623046875, 74.66509246826172, 97.18275451660156, 9.176666259765625, 251.53744506835938, 142.04788208007812, -80.21562194824219, 384.61590576171875, 71.58049774169922, 224.83682250976562, -20.477081298828125, 149.32530212402344], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000620.npy"}
|
|
{"epoch": 0.9104258443465492, "step": 621, "batch_size": 64, "mean": 286.56024169921875, "std": 266.24285888671875, "min": -408.0282287597656, "p10": -27.14005508422847, "median": 264.55316162109375, "p90": 657.271240234375, "max": 911.2449340820312, "pos_frac": 0.890625, "sample": [490.786376953125, -223.7530517578125, -408.0282287597656, -76.06590270996094, 542.1983642578125, 95.4531021118164, 192.1683349609375, 152.6537628173828, 314.5893859863281, 692.054931640625, 514.8458862304688, 145.5222625732422, 183.37818908691406, 496.1427001953125, 239.35635375976562, 62.46204376220703, 30.421533584594727, 321.5675964355469, 47.004432678222656, 175.37286376953125, 836.759521484375, 368.54425048828125, 716.7911376953125, 209.71202087402344, 490.3140563964844, 911.2449340820312, 32.283241271972656, 422.16949462890625, 651.814453125, -45.567569732666016, 149.74276733398438, 436.3842468261719, 491.2490234375, 159.20291137695312, 323.3119201660156, 692.262939453125, 133.16546630859375, 419.8873291015625, 322.2897033691406, 74.22763061523438, -75.91423034667969, 517.197021484375, -139.26638793945312, 372.08984375, 268.8242492675781, 196.67276000976562, 459.70269775390625, 260.2820739746094, 243.817626953125, 645.4358520507812, -191.7167205810547, 135.69512939453125, 131.83969116210938, 298.6574401855469, 189.1896209716797, 229.73451232910156, 411.1224060058594, 659.60986328125, 15.857479095458984, 271.58734130859375, 859.8212890625, 269.311767578125, 333.10626220703125, 193.2779998779297], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000621.npy"}
|
|
{"epoch": 0.9118942731277533, "step": 622, "batch_size": 64, "mean": 228.80435180664062, "std": 258.947021484375, "min": -290.7543640136719, "p10": -27.297573089599606, "median": 209.22176361083984, "p90": 556.316943359375, "max": 1100.17919921875, "pos_frac": 0.859375, "sample": [273.1888732910156, 305.8945007324219, 187.48068237304688, 310.5918273925781, 267.86041259765625, 341.0722351074219, -215.5122528076172, 161.56776428222656, 880.5847778320312, 203.41893005371094, 135.22323608398438, 30.63996124267578, 121.88865661621094, -76.75898742675781, 66.64627075195312, 375.2415771484375, 52.13920593261719, 215.02459716796875, 159.36422729492188, 153.46644592285156, 500.2052917480469, 27.41748046875, 27.425308227539062, 1100.17919921875, 248.759765625, 44.61651611328125, 263.75640869140625, 377.8514709472656, 333.28936767578125, -290.7543640136719, 232.03582763671875, -24.48797607421875, 681.8405151367188, 123.99520874023438, 124.18511962890625, 711.422607421875, 30.009902954101562, -104.25475311279297, 511.48858642578125, 297.3167419433594, 549.515869140625, 264.38897705078125, -0.6085739135742188, 122.45201873779297, 239.74478149414062, 96.864013671875, 308.5531311035156, -64.17404174804688, 22.944869995117188, 157.2707977294922, 595.45947265625, 419.809326171875, 559.231689453125, 342.9242858886719, 332.16265869140625, 277.2554016113281, -275.3125915527344, -28.501686096191406, 106.38810729980469, 802.5288696289062, 323.35198974609375, 249.67958068847656, 61.657108306884766, 14.571235656738281], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000622.npy"}
|
|
{"epoch": 0.9133627019089574, "step": 623, "batch_size": 64, "mean": 186.13925170898438, "std": 260.9971618652344, "min": -473.35552978515625, "p10": -44.56987953186035, "median": 136.10975646972656, "p90": 569.8760040283206, "max": 999.7001342773438, "pos_frac": 0.8125, "sample": [137.9425048828125, 204.0914306640625, -391.302490234375, 616.4453125, 180.25082397460938, 45.1221809387207, 432.39190673828125, 430.5879821777344, 219.91934204101562, 47.53033447265625, 768.5811157226562, 79.18867492675781, 261.02008056640625, -3.2276763916015625, 256.9272766113281, -42.81938552856445, 613.4410400390625, 94.00970458984375, 55.111328125, 86.36114501953125, 130.88050842285156, 165.9000244140625, 224.04269409179688, 74.15948486328125, -473.35552978515625, 199.0018310546875, 300.7752990722656, -45.320091247558594, 235.2359619140625, 264.8147888183594, -14.183563232421875, 98.13433837890625, 88.31726837158203, 82.4352798461914, -54.303192138671875, 80.00599670410156, 599.4685668945312, 261.53460693359375, 329.41546630859375, 162.02963256835938, -8.273117065429688, 96.15817260742188, 245.93797302246094, 500.8266906738281, -143.31155395507812, 14.42892074584961, 999.7001342773438, 363.765869140625, 74.86097717285156, -134.58151245117188, 790.3617553710938, 366.41876220703125, 18.484939575195312, 94.60999298095703, 134.27700805664062, 680.1722412109375, -292.531494140625, 163.67494201660156, 231.1610107421875, 124.40992736816406, 372.9218444824219, 55.2015380859375, 386.32537841796875, -22.650909423828125], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000623.npy"}
|
|
{"epoch": 0.9148311306901615, "step": 624, "batch_size": 64, "mean": 257.11578369140625, "std": 263.3613586425781, "min": -488.3939514160156, "p10": -61.564847946166985, "median": 252.0476531982422, "p90": 598.1448791503907, "max": 757.2966918945312, "pos_frac": 0.828125, "sample": [228.91281127929688, -57.57197952270508, 515.82275390625, 33.23865509033203, 387.08990478515625, 507.976318359375, 417.4590148925781, 752.5982055664062, 622.2337646484375, 33.91638946533203, 54.583404541015625, 50.27423095703125, 206.17025756835938, 479.6932067871094, 243.56710815429688, -46.637786865234375, 462.22796630859375, 582.7908935546875, 620.8656616210938, -488.3939514160156, 198.05796813964844, 491.8468017578125, -14.472251892089844, 168.7480010986328, 262.7561340332031, 219.90402221679688, 483.9036560058594, 568.1520385742188, 380.130615234375, 52.213287353515625, 216.09461975097656, 477.5652770996094, -144.66055297851562, 291.0957946777344, -6.716552734375, 409.59356689453125, 405.0684814453125, 30.654573440551758, 308.5312194824219, 347.401123046875, -111.23921966552734, 77.99674224853516, 260.5281982421875, -63.27607727050781, 531.311767578125, 360.4815673828125, 131.2912139892578, 394.5947570800781, 671.4366455078125, 382.8851013183594, 137.8321075439453, -295.0772705078125, 32.45567321777344, 679.9267578125, 81.31694793701172, 126.15214538574219, -107.43905639648438, 137.4107666015625, -90.76438903808594, 301.1715087890625, 757.2966918945312, 188.00433349609375, 604.7251586914062, 513.7041015625], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000624.npy"}
|
|
{"epoch": 0.9162995594713657, "step": 625, "batch_size": 64, "mean": 234.7877197265625, "std": 253.61647033691406, "min": -259.7597961425781, "p10": -37.45314140319823, "median": 194.02318572998047, "p90": 598.8096130371094, "max": 1109.7626953125, "pos_frac": 0.859375, "sample": [-52.484107971191406, -50.867889404296875, 330.5932922363281, 420.8525695800781, 82.60488891601562, 178.72650146484375, 601.8848876953125, 904.886962890625, 123.69508361816406, 389.0537414550781, 82.95297241210938, -259.7597961425781, 61.490692138671875, 190.4896697998047, 591.6339721679688, 346.1888427734375, 28.16731071472168, -98.73210144042969, 1109.7626953125, 352.23956298828125, 761.0137939453125, 168.0785675048828, 98.7322769165039, 106.13145446777344, -121.0713882446289, 345.0070495605469, 681.01171875, 46.933135986328125, 32.00477600097656, 197.55670166015625, 125.3196792602539, 301.72442626953125, 441.17718505859375, -25.921878814697266, 412.3887939453125, 100.88894653320312, 60.877113342285156, 344.52239990234375, 208.41891479492188, 66.2926025390625, 257.5224609375, 215.71798706054688, 563.171630859375, 262.7762451171875, 218.5799102783203, 239.81619262695312, 37.52183532714844, 358.54486083984375, 231.5137176513672, 185.2388458251953, 0.41724205017089844, -5.9015655517578125, 436.3994140625, 204.4055633544922, 49.20863723754883, 44.807945251464844, 158.78321838378906, 162.98147583007812, 615.268798828125, -111.74929809570312, 342.1056823730469, -42.395111083984375, 622.8621215820312, 294.3503723144531], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000625.npy"}
|
|
{"epoch": 0.9177679882525698, "step": 626, "batch_size": 64, "mean": 186.0296630859375, "std": 247.69920349121094, "min": -233.68988037109375, "p10": -124.9413558959961, "median": 131.44493865966797, "p90": 491.81701049804695, "max": 875.3749389648438, "pos_frac": 0.75, "sample": [321.4393005371094, 107.31808471679688, -125.59039306640625, 234.88706970214844, 166.5248260498047, 186.27891540527344, 426.6068115234375, 91.30517578125, 420.87127685546875, -233.68988037109375, 551.0399169921875, 428.8900451660156, 580.2452392578125, 875.3749389648438, -124.9315185546875, 25.477821350097656, 260.0961608886719, -109.9968490600586, 29.366455078125, 31.78863525390625, 128.95574951171875, -0.6380157470703125, 81.59822082519531, 3.971292495727539, -58.03071594238281, 137.4810333251953, 120.83535766601562, -157.9558868408203, 68.59011840820312, 500.1546630859375, -14.931684494018555, 412.9194641113281, -137.62376403808594, -140.98263549804688, 265.0554504394531, 377.6900634765625, 472.36248779296875, 145.94036865234375, 133.9341278076172, 453.0290222167969, 430.27349853515625, -44.887298583984375, 752.9877319335938, 365.09039306640625, 701.070556640625, 123.52100372314453, -59.60680389404297, 181.84774780273438, 92.70744323730469, 368.7833251953125, 18.14520263671875, 619.0995483398438, 361.16070556640625, 438.3219909667969, -4.890533447265625, 114.40361022949219, 188.966552734375, 261.021240234375, -230.85092163085938, -124.94557189941406, 110.63529968261719, 56.366615295410156, 264.1610107421875, -13.140514373779297], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000626.npy"}
|
|
{"epoch": 0.9192364170337739, "step": 627, "batch_size": 64, "mean": 274.2085266113281, "std": 335.41656494140625, "min": -381.3572082519531, "p10": -69.82087402343748, "median": 205.49041748046875, "p90": 728.3934204101564, "max": 1524.8092041015625, "pos_frac": 0.78125, "sample": [264.35791015625, 205.677978515625, 841.0526123046875, 330.0501708984375, 96.2193374633789, -8.114578247070312, 202.63088989257812, 431.4185485839844, 654.9910278320312, 176.65158081054688, 641.4488525390625, -224.83248901367188, 147.061279296875, -74.26870727539062, -79.55364227294922, 647.2161865234375, -17.955223083496094, 248.80389404296875, 540.7386474609375, 514.209716796875, -243.4519805908203, 392.8318786621094, 387.86126708984375, -7.89637565612793, 911.2625732421875, 132.17379760742188, 351.77301025390625, 750.8063354492188, 90.53582763671875, 97.75586700439453, 178.64370727539062, 51.77096176147461, 218.91171264648438, 839.106689453125, 515.34716796875, 456.23260498046875, 539.635986328125, 163.56503295898438, 373.0562744140625, -59.442596435546875, 105.63497924804688, 3.0057201385498047, 1524.8092041015625, 293.5280456542969, 205.3028564453125, -17.19538116455078, 905.6614990234375, -124.56639862060547, 94.4759521484375, -40.986297607421875, 676.0966186523438, 58.538536071777344, 400.39739990234375, -381.3572082519531, 149.24403381347656, 290.5384521484375, 224.38014221191406, 74.07658386230469, -167.47613525390625, 866.2706909179688, 318.18658447265625, -22.822952270507812, 236.50042724609375, 198.81927490234375], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000627.npy"}
|
|
{"epoch": 0.920704845814978, "step": 628, "batch_size": 64, "mean": 265.1599426269531, "std": 266.075439453125, "min": -400.65155029296875, "p10": -31.592395019531246, "median": 190.83072662353516, "p90": 629.8471069335939, "max": 928.88916015625, "pos_frac": 0.828125, "sample": [262.4884338378906, -10.796875, 300.0704650878906, 648.1983642578125, 608.4286499023438, 138.66360473632812, 392.9312744140625, 121.15630340576172, 58.72062683105469, -49.23846435546875, -33.887489318847656, 60.52027130126953, -400.65155029296875, 655.5831909179688, 406.8100280761719, 611.328125, 91.50700378417969, -57.98101043701172, 928.88916015625, 458.34808349609375, 576.1109008789062, 460.60638427734375, 189.60519409179688, 175.4993896484375, 71.51062774658203, -25.350135803222656, 172.8824005126953, 289.58953857421875, 6.751014709472656, -64.68351745605469, 637.7838134765625, 324.59149169921875, 485.77520751953125, 550.4786376953125, 543.4329223632812, 487.08612060546875, 280.10748291015625, 451.8826904296875, 22.773305892944336, 141.52532958984375, -23.510644912719727, 241.9454345703125, 125.23699188232422, 162.008544921875, 532.8812866210938, 404.0074462890625, 553.6802978515625, 171.38119506835938, -26.23717498779297, -149.3743896484375, 182.79412841796875, 96.82398223876953, 549.8432006835938, 660.0237426757812, -142.58343505859375, 67.66814422607422, 653.466064453125, 150.68325805664062, 449.8492126464844, 158.08270263671875, 751.359619140625, 235.44500732421875, 192.05625915527344, 3.658313751220703], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000628.npy"}
|
|
{"epoch": 0.922173274596182, "step": 629, "batch_size": 64, "mean": 220.31668090820312, "std": 212.06776428222656, "min": -245.51988220214844, "p10": -28.338907623291004, "median": 200.05453491210938, "p90": 485.135726928711, "max": 927.4924926757812, "pos_frac": 0.859375, "sample": [196.50814819335938, 256.462646484375, -61.87464904785156, -33.133338928222656, 409.19561767578125, 103.40684509277344, 148.9538116455078, 299.9858703613281, 383.374755859375, 420.3203125, 316.3343811035156, 181.12030029296875, 159.6248779296875, 102.74453735351562, 137.3721466064453, -35.20185089111328, 187.50515747070312, 123.40240478515625, 4.117044448852539, 425.1097106933594, 25.640579223632812, 575.6862182617188, 233.38333129882812, -205.2958526611328, 40.1302375793457, 231.32443237304688, -60.32667541503906, -56.638465881347656, 426.7875061035156, 287.5783386230469, 106.9322280883789, 268.85577392578125, 927.4924926757812, 675.87939453125, 375.06890869140625, 167.01112365722656, -245.51988220214844, 607.5148315429688, 358.2500915527344, 39.42042541503906, -17.151901245117188, 10.6171875, 192.5622100830078, -14.442825317382812, 286.7977294921875, 502.5058288574219, 199.09580993652344, 145.03871154785156, 284.65008544921875, 13.982519149780273, 370.39227294921875, 201.0132598876953, 219.00616455078125, 396.90399169921875, 110.2757568359375, 491.95196533203125, 53.63800811767578, 469.2311706542969, 497.4225769042969, 331.61309814453125, 344.1339111328125, 207.19979858398438, 247.21002197265625, 52.117034912109375], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000629.npy"}
|
|
{"epoch": 0.9236417033773862, "step": 630, "batch_size": 64, "mean": 171.56788635253906, "std": 247.88465881347656, "min": -253.02587890625, "p10": -117.91142120361327, "median": 116.60264205932617, "p90": 499.49632263183594, "max": 1042.3204345703125, "pos_frac": 0.796875, "sample": [548.45458984375, -172.08306884765625, -6.775793075561523, 172.09799194335938, -133.7303466796875, 441.1111145019531, 181.07464599609375, 84.24884033203125, 1042.3204345703125, 48.61663055419922, 275.3285217285156, 48.01957702636719, 169.86740112304688, 433.4482116699219, 500.2397766113281, -253.02587890625, 35.117523193359375, 61.759429931640625, 132.88958740234375, 187.09341430664062, 110.16690826416016, -27.996532440185547, 97.03935241699219, 642.7158203125, -159.3496856689453, 764.7858276367188, 66.28398132324219, 432.9732666015625, 96.21485137939453, 40.47929382324219, 28.459325790405273, 153.6080322265625, 256.1346435546875, -252.92440795898438, 308.21905517578125, -21.812334060668945, -114.71842956542969, 348.9847412109375, 108.29983520507812, 270.2794189453125, 64.90780639648438, 43.27607345581055, 78.3736801147461, -119.27984619140625, 140.623046875, 228.94873046875, 128.58758544921875, 163.68328857421875, 385.06689453125, -22.352378845214844, 115.09718322753906, 497.7615966796875, 541.1889038085938, 145.16525268554688, -81.33674621582031, 155.6853790283203, 118.10810089111328, 92.15882873535156, 397.4304504394531, -147.218994140625, 722.894775390625, 62.25110626220703, 325.08642578125, 0.3221111297607422], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000630.npy"}
|
|
{"epoch": 0.9251101321585903, "step": 631, "batch_size": 64, "mean": 203.8429412841797, "std": 280.50927734375, "min": -579.2224731445312, "p10": -149.3619720458984, "median": 200.37132263183594, "p90": 611.9894775390627, "max": 807.571044921875, "pos_frac": 0.796875, "sample": [704.05859375, 373.70904541015625, -99.51309204101562, 233.45762634277344, 104.68973541259766, 339.3436279296875, 59.382659912109375, 55.98577117919922, -111.38250732421875, -211.3828582763672, 323.3397521972656, 807.571044921875, -269.9188537597656, 522.4572143554688, -63.92646408081055, -34.75926208496094, 95.03960418701172, 559.2080078125, 15.588920593261719, 67.92977142333984, 64.52887725830078, 6.148439407348633, 280.17041015625, -259.2859191894531, 297.2066955566406, 230.52890014648438, 355.77313232421875, 26.010269165039062, 419.05364990234375, 212.40805053710938, -579.2224731445312, 355.8922424316406, 647.2333984375, 164.8153076171875, 368.50457763671875, 266.31829833984375, 157.27291870117188, -100.44355773925781, 155.8614501953125, 417.2177734375, 268.3365783691406, -32.143287658691406, 759.8514404296875, 459.2535095214844, 361.4907531738281, -193.02371215820312, 651.8453369140625, 199.2540283203125, 634.610107421875, 419.4805908203125, 108.00037384033203, 111.75483703613281, 45.221736907958984, 38.05748748779297, -207.60455322265625, 1.885223388671875, 377.72930908203125, 325.7738952636719, 757.5512084960938, -165.63888549804688, 201.48861694335938, 435.2492370605469, 166.98294067382812, 363.67071533203125], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000631.npy"}
|
|
{"epoch": 0.9265785609397944, "step": 632, "batch_size": 64, "mean": 203.0538330078125, "std": 275.9039001464844, "min": -510.4873352050781, "p10": -97.63955917358396, "median": 205.3672866821289, "p90": 592.4291809082032, "max": 826.4226684570312, "pos_frac": 0.8125, "sample": [196.2535400390625, -419.9652099609375, 149.56442260742188, 195.3077392578125, 440.095703125, -510.4873352050781, 589.1128540039062, 643.0274658203125, 602.1688842773438, 407.09478759765625, 357.05401611328125, 201.48609924316406, 277.565673828125, 584.060302734375, 249.0030517578125, 136.02777099609375, 455.8730163574219, 304.848388671875, 43.214412689208984, -45.839378356933594, 509.65423583984375, 274.4574890136719, 374.3534851074219, 341.80987548828125, -221.0520782470703, 649.1031494140625, 356.9259033203125, -272.6495666503906, 255.38235473632812, -6.437494277954102, -407.85980224609375, 47.573326110839844, 610.262939453125, 173.8954315185547, 185.93734741210938, 254.186767578125, 593.8504638671875, 307.4095153808594, 209.24847412109375, -68.0602798461914, 360.6424255371094, 43.35625457763672, -106.39490509033203, 4.640045166015625, 358.84259033203125, 265.73101806640625, 376.1149597167969, 16.42247772216797, 656.5460815429688, 826.4226684570312, 235.1895751953125, -77.21041870117188, -267.80230712890625, 36.99408721923828, 101.46110534667969, 68.2283706665039, 50.87427520751953, 319.868408203125, -8.536933898925781, 181.98876953125, 61.26008605957031, 51.34147644042969, 55.39191436767578, 360.6160583496094], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000632.npy"}
|
|
{"epoch": 0.9280469897209985, "step": 633, "batch_size": 64, "mean": 164.26397705078125, "std": 253.69015502929688, "min": -378.1680908203125, "p10": -154.91684112548822, "median": 168.13714599609375, "p90": 417.56456298828124, "max": 907.9172973632812, "pos_frac": 0.75, "sample": [-99.81423950195312, 369.84820556640625, 293.33551025390625, -91.97502899169922, 249.00787353515625, 343.290771484375, -178.53224182128906, -85.80648803710938, 715.2877197265625, 156.11886596679688, 281.7783508300781, 179.93646240234375, 215.1730499267578, -286.5077209472656, -194.34893798828125, 77.70450592041016, 186.95748901367188, 490.8838195800781, 186.51699829101562, 39.403564453125, 37.26451873779297, 565.9188232421875, 266.16387939453125, -22.225662231445312, 405.10186767578125, 71.25698852539062, 51.618133544921875, 137.27017211914062, -378.1680908203125, 117.23397064208984, 388.93463134765625, 209.96670532226562, 220.7007293701172, 218.30575561523438, 87.40489196777344, -49.19527053833008, -92.34237670898438, -348.964599609375, -288.4356689453125, 150.77462768554688, 907.9172973632812, 334.7993469238281, -55.55596923828125, -293.9410705566406, 418.00225830078125, 308.0274353027344, 282.6697998046875, 581.0548095703125, 393.3334045410156, 156.33782958984375, 52.743621826171875, 151.9644775390625, -6.897321701049805, 399.3076171875, 42.90238952636719, -19.023300170898438, 218.2523651123047, 654.4068603515625, 361.3514099121094, 81.20779418945312, 416.54327392578125, 78.940185546875, 214.79135131835938, 236.9165496826172], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000633.npy"}
|
|
{"epoch": 0.9295154185022027, "step": 634, "batch_size": 64, "mean": 186.1195068359375, "std": 251.7107696533203, "min": -332.34814453125, "p10": -90.2194854736328, "median": 136.3503646850586, "p90": 544.343603515625, "max": 744.0096435546875, "pos_frac": 0.734375, "sample": [-11.045578002929688, 420.88372802734375, 44.21216583251953, 717.1988525390625, 55.96833801269531, -9.784547805786133, 18.07078742980957, 278.1493225097656, 234.25900268554688, 466.4637451171875, 57.31689453125, 168.80078125, 8.539606094360352, 143.9241943359375, 541.8374633789062, 223.41519165039062, 91.80743408203125, -26.232666015625, 87.87313842773438, 197.89956665039062, 622.7056884765625, -201.7671356201172, -149.66358947753906, 79.70755004882812, -150.17391967773438, 171.99842834472656, 1.17340087890625, 545.4176635742188, 341.83966064453125, 99.75675964355469, 744.0096435546875, 158.64947509765625, 138.00750732421875, -332.34814453125, 732.729736328125, 389.577392578125, 691.336669921875, -152.72323608398438, 78.679931640625, 133.12509155273438, 293.5907287597656, -81.46482849121094, -41.32996368408203, -49.396060943603516, 51.26289367675781, -114.96228790283203, 197.8992156982422, -5.2796173095703125, 558.449951171875, 462.441162109375, -10.790433883666992, 447.5313720703125, 352.7745056152344, 478.47607421875, 480.5417175292969, 134.69322204589844, 219.42300415039062, 480.5054016113281, -57.828330993652344, 174.236083984375, -41.82319641113281, -93.97148132324219, 105.51236724853516, 319.5606994628906], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000634.npy"}
|
|
{"epoch": 0.9309838472834068, "step": 635, "batch_size": 64, "mean": 188.1464080810547, "std": 231.19342041015625, "min": -229.848388671875, "p10": -35.25636749267578, "median": 135.38947296142578, "p90": 561.749725341797, "max": 827.0810546875, "pos_frac": 0.78125, "sample": [241.45339965820312, 261.559814453125, 827.0810546875, -29.4422607421875, 220.99072265625, -49.653160095214844, 76.33880615234375, 27.512584686279297, -1.02777099609375, 141.915283203125, 117.17823028564453, 578.0606689453125, 46.08558654785156, 135.97691345214844, -75.3914794921875, 243.84738159179688, 134.80203247070312, -27.348724365234375, 626.4513549804688, 43.37504577636719, 236.42237854003906, 94.95750427246094, 523.6908569335938, 520.4862060546875, 227.21812438964844, 5.557838439941406, 7.8823699951171875, 303.8985290527344, 206.39488220214844, 737.1091918945312, -93.18689727783203, 323.74774169921875, 52.89024353027344, 603.4115600585938, 336.60107421875, 129.01974487304688, 116.94512176513672, -3.847766876220703, 53.494361877441406, 346.14715576171875, 66.03559875488281, 358.72406005859375, 623.5349731445312, 224.26211547851562, -81.86981964111328, 54.919044494628906, 320.9605712890625, -26.535911560058594, -229.848388671875, 281.142578125, -31.52752685546875, -117.51734924316406, 177.74368286132812, 306.75994873046875, 379.1608581542969, 193.0464324951172, 169.29144287109375, 98.30093383789062, 7.4307861328125, 805.4163208007812, -36.19123077392578, -33.07501983642578, 124.59274291992188, 138.0075225830078], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000635.npy"}
|
|
{"epoch": 0.9324522760646109, "step": 636, "batch_size": 64, "mean": 221.78475952148438, "std": 231.627197265625, "min": -189.03956604003906, "p10": -74.23186950683593, "median": 194.50141143798828, "p90": 439.4489227294922, "max": 1135.9569091796875, "pos_frac": 0.828125, "sample": [45.820884704589844, -43.38615417480469, 74.73947143554688, 26.790802001953125, 86.76205444335938, -67.26181030273438, 380.86016845703125, 156.04867553710938, 360.0171203613281, 55.644126892089844, 329.6236572265625, 98.42105865478516, 285.1205139160156, 350.064453125, 113.0527114868164, 182.258544921875, -92.77435302734375, 702.1973266601562, 509.02777099609375, 400.809326171875, 339.77923583984375, 370.112548828125, 149.23431396484375, -77.08238220214844, 384.9092712402344, 199.73202514648438, 402.70208740234375, 319.35693359375, 189.2707977294922, 1135.9569091796875, -67.58067321777344, -143.1369171142578, 432.226806640625, 356.18023681640625, -140.38125610351562, 672.195068359375, 419.6955261230469, 248.83193969726562, 251.65353393554688, -130.8380126953125, 470.18609619140625, 162.3513946533203, 246.76431274414062, 372.49713134765625, 172.97305297851562, 281.55975341796875, 434.7276611328125, 337.4405212402344, 71.61626434326172, -189.03956604003906, 158.84568786621094, -42.06048583984375, -143.70977783203125, 155.7677459716797, 217.96328735351562, 187.5972442626953, 365.19378662109375, 512.22900390625, 115.7870864868164, 249.94627380371094, 7.1127777099609375, 151.68836975097656, 188.6607666015625, 441.4723205566406], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000636.npy"}
|
|
{"epoch": 0.933920704845815, "step": 637, "batch_size": 64, "mean": 239.5159912109375, "std": 253.50344848632812, "min": -222.67715454101562, "p10": -40.259692382812496, "median": 197.39249420166016, "p90": 565.7118530273438, "max": 971.136962890625, "pos_frac": 0.828125, "sample": [213.93951416015625, 64.36128997802734, -19.567352294921875, 193.72637939453125, 60.24617385864258, 471.053955078125, 394.51055908203125, -222.67715454101562, 512.5031127929688, 541.327392578125, 444.56964111328125, -146.94766235351562, 106.2088623046875, 198.21185302734375, -166.14549255371094, 566.0625, 196.57313537597656, 350.1095886230469, 564.8936767578125, 298.8017272949219, 561.3079833984375, 37.96947479248047, 70.76620483398438, 971.136962890625, 106.04379272460938, 342.4110107421875, 16.50546646118164, 30.41225814819336, 455.8529968261719, 178.23175048828125, 296.8537902832031, -155.55096435546875, 58.185096740722656, 275.76806640625, 337.2147216796875, -51.132598876953125, 177.69886779785156, 268.40692138671875, 340.0328674316406, 128.68521118164062, 191.3326416015625, 498.42828369140625, 148.92669677734375, 285.7596740722656, 529.8826904296875, 318.85125732421875, 577.7210693359375, -42.533226013183594, 596.50341796875, -31.210304260253906, 724.2183837890625, -131.87953186035156, 204.14837646484375, 529.4835815429688, 260.6171569824219, 9.566347122192383, 110.38648986816406, 660.3120727539062, -20.492704391479492, 696.8771362304688, 12.895219802856445, 80.47410583496094, 85.1478271484375, -34.95478057861328], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000637.npy"}
|
|
{"epoch": 0.9353891336270191, "step": 638, "batch_size": 64, "mean": 227.61680603027344, "std": 215.01885986328125, "min": -181.33477783203125, "p10": -20.343343544006345, "median": 195.2665252685547, "p90": 521.6813842773438, "max": 819.257080078125, "pos_frac": 0.875, "sample": [647.1600341796875, -181.33477783203125, -156.08267211914062, 179.94088745117188, 283.1649169921875, 47.42999267578125, 134.97787475585938, 89.80624389648438, 393.6571350097656, 112.66709899902344, 279.10198974609375, 819.257080078125, 76.66392517089844, 578.9684448242188, 244.8533172607422, 56.9629020690918, 287.33331298828125, 189.23928833007812, 358.57501220703125, 155.79737854003906, 124.8302230834961, 41.754417419433594, 97.89446258544922, 212.90878295898438, 75.25257873535156, 469.5617980957031, 223.56655883789062, 157.14881896972656, 609.2169189453125, 348.3128356933594, 424.70892333984375, 373.00933837890625, 268.1512451171875, 180.60238647460938, 327.1859130859375, 210.85580444335938, 26.482101440429688, -30.044063568115234, 54.015411376953125, -21.444561004638672, -149.0203399658203, 201.29376220703125, 293.9111022949219, 532.4669189453125, 61.62812042236328, -31.2357120513916, 617.5480346679688, 183.79383850097656, 274.1123046875, 287.87811279296875, 71.93272399902344, -17.773836135864258, 496.51513671875, 181.2715301513672, 346.1025085449219, 49.6181640625, 73.72635650634766, 95.9161148071289, 435.10101318359375, 728.1199340820312, -62.274864196777344, 208.43136596679688, 471.17230224609375, 445.13177490234375], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000638.npy"}
|
|
{"epoch": 0.9368575624082232, "step": 639, "batch_size": 64, "mean": 165.6368408203125, "std": 243.4095001220703, "min": -394.4620361328125, "p10": -142.2639389038086, "median": 159.0180892944336, "p90": 514.6058471679688, "max": 751.8912963867188, "pos_frac": 0.734375, "sample": [532.3295288085938, -74.73304748535156, -78.90476989746094, 546.4711303710938, 417.8572692871094, 133.78965759277344, 313.25726318359375, 205.254638671875, 15.621864318847656, 262.2759704589844, 404.2469177246094, -86.75326538085938, 319.64520263671875, 100.97366333007812, -158.98646545410156, 30.846527099609375, -141.81922912597656, -192.5789794921875, -34.312652587890625, 55.171409606933594, 537.8184814453125, -117.17855072021484, 384.7987060546875, 294.86578369140625, 189.8273468017578, -142.45452880859375, -236.45448303222656, 189.8422393798828, 49.426734924316406, 306.0517578125, 489.1365966796875, 74.8283462524414, 48.46117401123047, -87.49479675292969, 525.521240234375, 296.6405944824219, 157.06394958496094, 550.670166015625, 194.7587432861328, 187.24139404296875, 112.71224212646484, 367.4509582519531, 444.72210693359375, 310.76715087890625, 751.8912963867188, -292.005615234375, 118.71196746826172, 269.6417541503906, -197.581298828125, 131.62603759765625, 133.11001586914062, -58.44606018066406, 300.2899169921875, 69.1777114868164, -394.4620361328125, -120.98709869384766, -2.61810302734375, 226.5469512939453, 211.68434143066406, 435.5807189941406, 139.30718994140625, 563.0873413085938, 456.554443359375, 160.97222900390625], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000639.npy"}
|
|
{"epoch": 0.9383259911894273, "step": 640, "batch_size": 64, "mean": 278.02667236328125, "std": 281.4782409667969, "min": -174.88809204101562, "p10": -67.59055633544921, "median": 275.6049346923828, "p90": 621.1993164062503, "max": 1224.927734375, "pos_frac": 0.875, "sample": [673.8739624023438, 314.5404968261719, 2.363187789916992, 102.93479919433594, 179.6851806640625, 53.811614990234375, 411.41839599609375, 668.2587890625, 360.7242126464844, 53.351585388183594, 487.00701904296875, 656.8257446289062, 472.6722412109375, 205.15591430664062, 54.20869827270508, 148.61386108398438, 394.48046875, -73.4244384765625, -53.97816467285156, 291.73370361328125, -96.51043701171875, 403.95123291015625, 241.19119262695312, 145.90635681152344, 57.80685043334961, -171.6835174560547, 61.718475341796875, 353.63787841796875, 1224.927734375, 501.9697265625, 380.3369140625, 347.8919372558594, -135.95809936523438, 419.35321044921875, 921.3043212890625, 69.16663360595703, -174.88809204101562, 281.25848388671875, -79.406982421875, 296.5234375, 104.87321472167969, 317.58740234375, 87.13612365722656, 538.0709838867188, 344.4539489746094, 150.10360717773438, 292.48736572265625, 448.2298583984375, -170.64767456054688, 527.03076171875, 817.6370849609375, 294.1477355957031, 7.086212158203125, 437.1403503417969, 1.1820220947265625, 269.9513854980469, 245.6955108642578, 231.1036376953125, 140.60853576660156, 487.5492248535156, 494.0447692871094, 966.5103759765625, 251.1698760986328, 57.80008316040039], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000640.npy"}
|
|
{"epoch": 0.9397944199706314, "step": 641, "batch_size": 64, "mean": 201.86199951171875, "std": 270.418212890625, "min": -365.6437072753906, "p10": -122.38363647460938, "median": 193.27106475830078, "p90": 526.3090454101563, "max": 1369.4395751953125, "pos_frac": 0.796875, "sample": [100.6003189086914, 180.3283233642578, 332.9795837402344, 633.1854858398438, -123.19235229492188, 108.35873413085938, 331.25323486328125, 507.16448974609375, 189.45062255859375, 2.9373931884765625, -113.47273254394531, 207.71719360351562, -41.60728454589844, -147.88424682617188, 1369.4395751953125, -120.9427490234375, -99.81436920166016, -186.79638671875, 17.651748657226562, 228.3138427734375, 284.5853576660156, 188.17401123046875, 113.0318603515625, 286.69085693359375, 231.41305541992188, 198.20632934570312, -365.6437072753906, 147.9933624267578, 229.3524169921875, 163.2345733642578, 193.94273376464844, 216.849365234375, 192.59939575195312, -54.540008544921875, 118.66571807861328, 179.42822265625, 638.752685546875, 131.06031799316406, 223.2030487060547, 272.8358154296875, 230.77334594726562, -79.23898315429688, 19.96245765686035, -316.8739929199219, 168.25482177734375, 157.47166442871094, -276.1629638671875, 212.3977813720703, 447.39837646484375, 534.5138549804688, 631.3306274414062, 538.0867309570312, -123.00115966796875, 320.6674499511719, 182.74407958984375, 410.1895446777344, 363.49383544921875, 341.00335693359375, 235.50367736816406, 178.8565673828125, 349.90606689453125, 399.41058349609375, 469.9831848144531, 556.990234375], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000641.npy"}
|
|
{"epoch": 0.9412628487518355, "step": 642, "batch_size": 64, "mean": 235.47906494140625, "std": 321.9939880371094, "min": -334.7275085449219, "p10": -99.21886901855468, "median": 189.12310028076172, "p90": 616.1400695800783, "max": 1553.72216796875, "pos_frac": 0.78125, "sample": [177.9252166748047, -100.06318664550781, 349.8071594238281, 418.427734375, 206.60304260253906, -334.7275085449219, 564.9955444335938, 166.27407836914062, -53.782569885253906, 167.85691833496094, 14.402397155761719, 49.56842041015625, 589.6470947265625, -132.37713623046875, 326.92327880859375, 755.5625, 186.48069763183594, 437.46624755859375, -283.5279541015625, 5.590389251708984, -226.11215209960938, -21.55615234375, 73.43795013427734, 61.17201232910156, 159.77850341796875, 238.7420196533203, 107.2712631225586, 205.00128173828125, 82.22502136230469, 292.9233093261719, 408.9454040527344, 206.59085083007812, 460.6788024902344, 859.1781005859375, -85.43533325195312, 184.70065307617188, 175.374755859375, 469.2449035644531, -252.25213623046875, 249.63143920898438, 481.0452880859375, 211.5824432373047, 260.8545227050781, 576.1005249023438, -12.097225189208984, 823.7908325195312, 418.13427734375, 627.4942016601562, -128.6608428955078, 191.7655029296875, 1553.72216796875, 260.3298034667969, -26.372940063476562, 91.11811828613281, -97.24879455566406, -84.06455993652344, 198.98899841308594, 873.935791015625, 112.63504028320312, 47.780113220214844, 305.97802734375, 645.9578247070312, 49.37835693359375, 525.9203491210938], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000642.npy"}
|
|
{"epoch": 0.9427312775330396, "step": 643, "batch_size": 64, "mean": 216.9004364013672, "std": 238.89971923828125, "min": -314.54888916015625, "p10": -77.69179229736328, "median": 228.38202667236328, "p90": 531.6743103027344, "max": 772.53662109375, "pos_frac": 0.828125, "sample": [184.08175659179688, 105.73902893066406, 124.7955551147461, 533.259765625, 430.8516845703125, 445.7777099609375, 363.7890625, -78.73821258544922, 51.26057434082031, 246.78599548339844, 12.827472686767578, 26.525009155273438, 203.74887084960938, 64.19467163085938, -94.07850646972656, 463.4316101074219, 562.7155151367188, 527.9749145507812, 284.2419738769531, 325.8955078125, 162.53773498535156, 38.8684196472168, -3.888254165649414, 112.18799591064453, 134.85865783691406, 156.6932373046875, -314.54888916015625, 252.64138793945312, 339.8758239746094, 355.49249267578125, -296.67340087890625, -2.5927600860595703, -86.1732177734375, 420.890625, 584.41064453125, 633.263427734375, 254.698486328125, -71.99493408203125, 266.4039306640625, 431.4797668457031, 38.3679084777832, 742.375732421875, 667.1892700195312, 350.8496398925781, 376.0677490234375, 365.8459777832031, 3.5634307861328125, 261.78521728515625, 772.53662109375, 30.166362762451172, 434.2801818847656, 351.28765869140625, 234.0002899169922, -140.4482421875, 392.9371643066406, 37.038658142089844, 25.954208374023438, -75.2501449584961, 151.22413635253906, 222.76376342773438, 70.23501586914062, 270.307861328125, -124.19499969482422, 269.2335205078125], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000643.npy"}
|
|
{"epoch": 0.9441997063142438, "step": 644, "batch_size": 64, "mean": 191.04055786132812, "std": 292.2703857421875, "min": -743.8994140625, "p10": -107.82975311279294, "median": 142.2873992919922, "p90": 611.5345275878906, "max": 978.916748046875, "pos_frac": 0.78125, "sample": [805.4915771484375, -79.99919891357422, 580.3683471679688, -743.8994140625, 138.63291931152344, 269.43865966796875, -247.18893432617188, -2.6522674560546875, 146.218505859375, 115.37738037109375, 37.09279251098633, 160.2713165283203, -41.61376190185547, 413.218017578125, 25.507949829101562, 612.3592529296875, 96.95850372314453, 146.93431091308594, 204.71701049804688, -174.40431213378906, 140.604736328125, 609.6101684570312, 93.4802474975586, -119.75713348388672, 438.2265625, 453.4230041503906, -4.399662017822266, 70.93547821044922, 220.37347412109375, 584.3858032226562, 20.784067153930664, -181.45114135742188, 495.1517028808594, 62.68373107910156, 145.1508026123047, 22.75493812561035, 978.916748046875, 84.31497192382812, 145.5317840576172, 222.912353515625, -62.755653381347656, -41.491432189941406, 389.1519470214844, 19.460647583007812, 402.9120178222656, 248.70748901367188, 8.369001388549805, -141.127197265625, 100.84446716308594, 659.4356689453125, 660.1150512695312, 426.7948303222656, -60.91774368286133, -252.92625427246094, 425.05584716796875, 636.9489135742188, 267.997802734375, 303.75445556640625, 32.34716796875, 143.97006225585938, 139.76425170898438, 690.450927734375, 104.11981201171875, 179.15286254882812], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000644.npy"}
|
|
{"epoch": 0.9456681350954479, "step": 645, "batch_size": 64, "mean": 155.6749267578125, "std": 281.9188232421875, "min": -358.77191162109375, "p10": -151.38954925537107, "median": 95.23555374145508, "p90": 570.6736572265626, "max": 992.7620239257812, "pos_frac": 0.703125, "sample": [46.85426330566406, 453.54449462890625, -162.02859497070312, 581.5299682617188, -267.46759033203125, -21.950363159179688, -271.265869140625, 36.22698974609375, 263.4684753417969, 15.533557891845703, 0.055103302001953125, 22.8492431640625, 58.55486297607422, -6.67723274230957, -41.573429107666016, 99.25252532958984, 258.18048095703125, -0.4642009735107422, 654.0308837890625, 199.87411499023438, 300.3202819824219, -106.85187530517578, 139.01690673828125, 110.65409851074219, 106.24696350097656, 378.5513610839844, 577.5535888671875, 118.41609954833984, -252.99134826660156, -358.77191162109375, 83.94532012939453, 246.91400146484375, -116.99114990234375, 476.0951232910156, 622.421142578125, -15.722015380859375, 449.70098876953125, 505.2139587402344, -268.92193603515625, -22.069602966308594, 992.7620239257812, 57.047088623046875, 54.534820556640625, 48.81612014770508, 91.21858215332031, 394.9049072265625, 242.0824737548828, 222.49684143066406, 162.19064331054688, 322.72064208984375, 202.21807861328125, -51.57603454589844, 622.3035888671875, 43.27082061767578, 554.6204833984375, 908.2451782226562, 106.22183990478516, 317.03082275390625, -279.07818603515625, 14.919456481933594, 262.00787353515625, -3.2478694915771484, -126.56510925292969, -87.20712280273438], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000645.npy"}
|
|
{"epoch": 0.947136563876652, "step": 646, "batch_size": 64, "mean": 194.4962158203125, "std": 219.5928955078125, "min": -410.946533203125, "p10": -52.78867797851559, "median": 173.50955200195312, "p90": 537.8529113769533, "max": 650.9850463867188, "pos_frac": 0.828125, "sample": [620.3505249023438, 496.31121826171875, 205.24749755859375, 376.43212890625, 335.75897216796875, 170.8042755126953, 634.4165649414062, 362.17572021484375, 627.8751220703125, 15.621414184570312, 307.1325378417969, 245.3055877685547, -410.946533203125, 175.62228393554688, 247.64041137695312, 126.59712219238281, 40.80025100708008, 52.71332550048828, 393.2267150878906, 63.744789123535156, 295.52105712890625, 81.58722686767578, -9.882698059082031, 196.53231811523438, 91.79428100585938, 202.86581420898438, 322.78741455078125, 155.83775329589844, -87.5787353515625, 343.42529296875, 155.89959716796875, 240.31076049804688, 20.522186279296875, 339.9743957519531, -68.77119445800781, 236.01417541503906, 39.063720703125, 489.1257019042969, 119.65867614746094, -225.02220153808594, 221.01426696777344, 650.9850463867188, 112.92829895019531, -82.88737487792969, -85.92555236816406, -15.496139526367188, 179.6846466064453, 171.39682006835938, -1.1343536376953125, 592.472900390625, 555.656494140625, 108.71662139892578, 197.99134826660156, 9.982711791992188, 446.3045349121094, -6.0261688232421875, -93.69264221191406, 90.65106964111328, 197.3870391845703, 157.5328369140625, 61.79779815673828, 631.3824462890625, 58.808555603027344, 261.7309875488281], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000646.npy"}
|
|
{"epoch": 0.9486049926578561, "step": 647, "batch_size": 64, "mean": 202.34747314453125, "std": 244.05262756347656, "min": -389.975830078125, "p10": -119.22219543457028, "median": 199.406494140625, "p90": 511.64754638671883, "max": 807.11474609375, "pos_frac": 0.796875, "sample": [333.2333984375, 444.8432922363281, 225.2169647216797, 182.0182342529297, 171.39108276367188, 461.3279724121094, 374.72625732421875, 342.6642150878906, 368.5145263671875, 140.73748779296875, 633.9295654296875, 129.98916625976562, 32.914581298828125, 650.791748046875, 460.21514892578125, 215.13037109375, 29.83026123046875, 601.861083984375, 395.6225280761719, 14.948945999145508, 136.10018920898438, -57.39581298828125, -47.19578552246094, -84.85330200195312, 362.0869140625, 314.74249267578125, -48.68858337402344, 807.11474609375, -163.0392608642578, 570.9017944335938, 47.51592254638672, -13.233894348144531, 88.33523559570312, 363.7781982421875, 498.92791748046875, 82.00782775878906, 231.48719787597656, 20.430343627929688, -267.19744873046875, -197.26820373535156, 339.915283203125, -8.151557922363281, 576.0490112304688, -133.95172119140625, -182.20042419433594, 86.6204605102539, 326.3232116699219, 183.6826171875, 88.43521881103516, 388.3696594238281, 123.02931213378906, 102.80342864990234, 412.89727783203125, 239.56793212890625, 2.4202728271484375, 306.61236572265625, -389.975830078125, 253.27578735351562, -139.726806640625, 287.21942138671875, 312.9125671386719, 517.0988159179688, 113.99119567871094, 288.5565490722656], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000647.npy"}
|
|
{"epoch": 0.9500734214390602, "step": 648, "batch_size": 64, "mean": 303.04656982421875, "std": 349.8512268066406, "min": -347.2199401855469, "p10": -94.98651504516599, "median": 285.9599151611328, "p90": 790.9949035644532, "max": 1449.9512939453125, "pos_frac": 0.8125, "sample": [228.61984252929688, 55.00901794433594, 65.69219207763672, 305.1379089355469, 177.6473388671875, 428.5711975097656, 756.0908203125, 1113.92724609375, 267.14117431640625, 904.70751953125, 63.12921142578125, -109.22066497802734, 154.46463012695312, 569.996826171875, 154.90985107421875, -222.0022735595703, 488.4578857421875, 284.31658935546875, 145.2058563232422, -251.3126220703125, -23.170515060424805, 355.6811828613281, 28.31072425842285, 483.095458984375, -111.02708435058594, 702.0256958007812, 374.9886474609375, 159.49801635742188, 61.33750915527344, 42.23615264892578, 561.9814453125, -13.027534484863281, -49.920860290527344, 1449.9512939453125, 658.0888671875, 287.6032409667969, -138.91481018066406, 215.58409118652344, 82.50634765625, -197.28097534179688, 59.74493408203125, 294.77630615234375, 960.5819702148438, 390.2025146484375, 98.65846252441406, 457.3232727050781, 805.9537963867188, -61.77349853515625, 311.79052734375, 292.3116149902344, 504.56781005859375, 304.0919494628906, -23.73065185546875, 321.3934631347656, 591.355712890625, 78.85994720458984, 394.5947570800781, 541.2445068359375, 203.8782958984375, 897.8194580078125, 523.9166259765625, 351.4387512207031, -347.2199401855469, 933.1625366210938], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000648.npy"}
|
|
{"epoch": 0.9515418502202643, "step": 649, "batch_size": 64, "mean": 173.563720703125, "std": 271.55169677734375, "min": -292.35418701171875, "p10": -105.07905197143553, "median": 90.4122543334961, "p90": 450.7567565917969, "max": 1203.9141845703125, "pos_frac": 0.671875, "sample": [-13.576793670654297, -4.888877868652344, 213.09378051757812, -53.829986572265625, 414.1908264160156, -42.75050735473633, 80.936279296875, 208.92666625976562, 403.5775451660156, 218.09307861328125, 412.01751708984375, -86.91989135742188, -16.762855529785156, 8.314804077148438, -112.8615493774414, -84.98471069335938, 8.694168090820312, 404.3873596191406, -202.390869140625, 297.1224060058594, 186.97634887695312, 71.51276397705078, -268.7586364746094, -40.66059875488281, 283.89697265625, -189.69712829589844, 418.75396728515625, 331.13909912109375, -20.790081024169922, 329.4010009765625, 337.7516174316406, -115.70418548583984, 31.31634521484375, -28.687461853027344, 637.2420654296875, 218.8494873046875, 454.2171936035156, 241.10012817382812, 416.7217712402344, 70.585205078125, 711.6693115234375, 61.045433044433594, 22.127887725830078, 621.1444091796875, 361.02618408203125, 210.03567504882812, 442.6824035644531, 1203.9141845703125, 246.86328125, 121.78557586669922, 29.74782371520996, 99.88822937011719, 14.917526245117188, 572.9363403320312, -39.759185791015625, 377.2909240722656, 664.477783203125, -193.75100708007812, -5.390979766845703, 57.015769958496094, -292.35418701171875, -15.17724609375, 424.3469543457031, -3.9589004516601562], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000649.npy"}
|
|
{"epoch": 0.9530102790014684, "step": 650, "batch_size": 64, "mean": 241.52120971679688, "std": 290.724609375, "min": -341.75518798828125, "p10": -179.6679702758789, "median": 237.13770294189453, "p90": 593.6239074707033, "max": 933.3805541992188, "pos_frac": 0.8125, "sample": [-73.44396209716797, -237.68963623046875, 474.0811767578125, 513.5436401367188, 247.010986328125, 458.8929138183594, 279.60009765625, -242.1095428466797, 785.6015625, 254.96531677246094, 661.2244262695312, -187.07130432128906, 762.564697265625, 403.7073974609375, 549.9852905273438, 462.5712585449219, 227.26441955566406, 643.4293823242188, 299.187744140625, 933.3805541992188, 429.69403076171875, 93.54911041259766, 377.00299072265625, 301.7382507324219, 159.6230926513672, 94.77903747558594, 138.2774658203125, -39.127586364746094, 43.863250732421875, 307.0341491699219, 499.6259765625, 104.66999816894531, -162.39352416992188, -268.92034912109375, 30.667978286743164, 612.326171875, 13.145954132080078, 844.6942138671875, -39.1220817565918, 160.96170043945312, 373.73516845703125, 518.764404296875, -245.07537841796875, 436.27679443359375, 4.782798767089844, 357.2295227050781, 158.87367248535156, 404.8857116699219, 161.91650390625, 527.9558715820312, 305.7038879394531, 162.8629150390625, 410.982177734375, 219.09527587890625, 276.1386413574219, 154.0111083984375, 163.21705627441406, -341.75518798828125, -295.0094909667969, 183.35537719726562, 547.663818359375, 65.78321075439453, -36.04495620727539, 23.22156524658203], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000650.npy"}
|
|
{"epoch": 0.9544787077826725, "step": 651, "batch_size": 64, "mean": 264.6536560058594, "std": 237.98574829101562, "min": -179.34072875976562, "p10": -9.023788452148423, "median": 248.0232162475586, "p90": 626.4717102050781, "max": 696.6707763671875, "pos_frac": 0.890625, "sample": [479.93450927734375, 382.24853515625, 620.5592651367188, 629.005615234375, -109.61012268066406, 112.99664306640625, 212.92990112304688, 501.1535949707031, 158.4171142578125, 104.11732482910156, 507.6276550292969, 585.8159790039062, 189.5654754638672, 104.02576446533203, 119.5762939453125, 259.98712158203125, 47.88095474243164, 107.33555603027344, -65.38626098632812, 244.80186462402344, 696.6707763671875, -108.10786437988281, 639.4645385742188, 256.9764099121094, 425.52178955078125, 47.44788360595703, -14.761014938354492, -179.34072875976562, 104.1412353515625, 316.5434265136719, 138.90879821777344, 14.377754211425781, 354.68505859375, 201.1532745361328, 671.0037841796875, 385.722412109375, 393.19769287109375, 159.98751831054688, 339.3518371582031, 4.363073348999023, 240.56777954101562, 484.4635314941406, -114.70933532714844, 251.24456787109375, 77.26214599609375, 392.7449645996094, 9.762451171875, 673.0466918945312, 5.578655242919922, 254.76368713378906, 113.18284606933594, 179.03378295898438, 696.3460693359375, 616.2547607421875, 265.605224609375, -110.5750732421875, 261.1418151855469, 40.27104568481445, 605.88623046875, 269.46490478515625, 460.499267578125, 425.7102355957031, 668.67822265625, 131.31861877441406], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000651.npy"}
|
|
{"epoch": 0.9559471365638766, "step": 652, "batch_size": 64, "mean": 228.78182983398438, "std": 272.0896301269531, "min": -344.958740234375, "p10": -42.43675956726074, "median": 162.01258850097656, "p90": 576.7321350097657, "max": 999.8045654296875, "pos_frac": 0.796875, "sample": [260.4031982421875, 171.2171630859375, 553.7996826171875, -344.958740234375, 58.31966018676758, 344.5110168457031, 282.35491943359375, -77.5024185180664, 467.220947265625, 152.80801391601562, 489.37677001953125, 742.99755859375, 655.9697265625, -187.8620147705078, 45.45594787597656, -16.32421875, 34.26177978515625, 563.3865966796875, 792.95751953125, -45.723121643066406, 279.23388671875, 105.52271270751953, 77.50438690185547, 117.8721923828125, 132.46627807617188, 11.31671142578125, 321.5757141113281, 86.84364318847656, 141.35220336914062, 193.6385498046875, 54.84917068481445, 999.8045654296875, -91.3493881225586, 526.8383178710938, 223.93942260742188, 453.7261657714844, 230.1559295654297, 33.340576171875, 73.26190185546875, 23.13873291015625, 275.7335205078125, 49.921653747558594, -9.11428451538086, -29.812599182128906, 381.32293701171875, 304.7782287597656, 621.16650390625, -57.422340393066406, 365.4970703125, 332.0007629394531, 928.3179931640625, -6.521137237548828, 578.2463989257812, 84.36373138427734, -34.76858139038086, 77.22224426269531, 546.17724609375, -130.77000427246094, 209.98504638671875, 573.1988525390625, 150.38697814941406, 193.80186462402344, 312.26861572265625, -11.645263671875], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000652.npy"}
|
|
{"epoch": 0.9574155653450808, "step": 653, "batch_size": 64, "mean": 238.45352172851562, "std": 275.5760498046875, "min": -263.001220703125, "p10": -69.70762023925779, "median": 192.2409896850586, "p90": 624.963299560547, "max": 1029.3409423828125, "pos_frac": 0.796875, "sample": [350.55908203125, 4.429718017578125, 75.91682434082031, 1029.3409423828125, 386.94403076171875, -93.15970611572266, 76.93870544433594, 17.0009765625, -50.0611572265625, 401.9757080078125, 690.42236328125, 250.9407196044922, -228.37347412109375, 549.9632568359375, 438.4727478027344, 363.7271728515625, 381.7719421386719, 458.0505065917969, 640.975830078125, 99.293701171875, 42.525516510009766, 172.88246154785156, 532.8572387695312, 255.5426025390625, 273.1025390625, 214.48509216308594, 632.2322387695312, 70.00591278076172, 154.43557739257812, -263.001220703125, 675.2473754882812, 202.90016174316406, 479.71295166015625, -243.78427124023438, 205.13491821289062, -17.822189331054688, 138.02352905273438, 387.3919372558594, -18.35515594482422, 719.935302734375, 919.7512817382812, 509.43768310546875, -28.053972244262695, -27.73810577392578, 608.00244140625, 342.9090576171875, 19.095001220703125, 82.22566223144531, 165.83255004882812, 181.58181762695312, -78.12753295898438, 129.5411834716797, 599.8056030273438, 146.92532348632812, 234.01321411132812, 124.80772399902344, -25.226905822753906, 76.7955322265625, 419.12091064453125, 216.2875518798828, 235.18771362304688, -102.07008361816406, 165.4298095703125, -113.08946228027344], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000653.npy"}
|
|
{"epoch": 0.9588839941262849, "step": 654, "batch_size": 64, "mean": 212.80287170410156, "std": 272.211181640625, "min": -234.30233764648438, "p10": -100.17584075927735, "median": 180.3289031982422, "p90": 527.4308013916017, "max": 1161.8070068359375, "pos_frac": 0.765625, "sample": [-2.2399673461914062, 482.59600830078125, -97.71771240234375, 253.584228515625, 142.58009338378906, 62.08021545410156, -196.9853515625, 216.9610595703125, -30.711193084716797, 496.0169982910156, 224.70791625976562, 96.11946868896484, 190.2436065673828, 171.885498046875, 122.2720947265625, -139.32562255859375, 36.12580108642578, 714.8072509765625, 315.4244689941406, 404.7273864746094, 216.72703552246094, 1161.8070068359375, 282.183349609375, 156.15264892578125, 371.0337219238281, 349.12255859375, -99.95274353027344, 353.64605712890625, 38.145050048828125, -68.02801513671875, -41.74528503417969, 366.5240478515625, 334.2421875, 219.81222534179688, 34.727752685546875, -11.997306823730469, 667.7255249023438, -81.78207397460938, 131.68331909179688, 313.5489807128906, 597.3049926757812, 258.31732177734375, 480.154052734375, 1.3909664154052734, -175.38853454589844, 429.1037292480469, 188.77230834960938, 482.3845520019531, 563.7677612304688, 90.88768005371094, 130.24407958984375, 540.8938598632812, 375.1331481933594, 68.6741714477539, 153.1015625, -157.14776611328125, 450.5130310058594, -100.27145385742188, 949.3543701171875, 332.9516296386719, 131.50991821289062, -101.97503662109375, -234.30233764648438, 7.280906677246094], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000654.npy"}
|
|
{"epoch": 0.960352422907489, "step": 655, "batch_size": 64, "mean": 225.6510009765625, "std": 296.91571044921875, "min": -295.1260070800781, "p10": -124.40712127685545, "median": 181.9922637939453, "p90": 731.530633544922, "max": 989.080810546875, "pos_frac": 0.78125, "sample": [-226.76467895507812, -236.99925231933594, 754.4701538085938, 290.25103759765625, 38.29993438720703, 839.4303588867188, 205.055908203125, 561.7637939453125, 123.99322509765625, 45.08592987060547, 321.72979736328125, 29.70522117614746, 51.25579833984375, -295.1260070800781, 48.291603088378906, 387.17645263671875, -64.6391830444336, 850.4534912109375, -28.629562377929688, 583.6795654296875, 96.97021484375, 238.89205932617188, 580.0985107421875, 187.83547973632812, -8.583215713500977, 122.68720245361328, 246.30821228027344, 175.14810180664062, -140.38706970214844, 179.93682861328125, -89.6137924194336, 822.1915893554688, 195.6386260986328, 57.7469482421875, 428.88970947265625, 298.6488342285156, -131.89395141601562, 465.48028564453125, 367.70831298828125, 409.3353576660156, 214.86456298828125, 118.76386260986328, 341.2870788574219, 989.080810546875, 748.0053100585938, 127.99525451660156, 184.04769897460938, -155.22488403320312, 298.6859436035156, 334.538330078125, -106.93785095214844, -14.15130615234375, 71.42071533203125, -90.90370178222656, 693.0897216796875, 11.252918243408203, 782.4454345703125, -174.09938049316406, 11.594772338867188, 386.8472900390625, 178.63525390625, 223.6411590576172, 134.24269104003906, 351.0206604003906], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000655.npy"}
|
|
{"epoch": 0.9618208516886931, "step": 656, "batch_size": 64, "mean": 316.3350830078125, "std": 266.9253845214844, "min": -120.64405059814453, "p10": 1.5380786895752019, "median": 253.60589599609375, "p90": 743.2682373046875, "max": 977.5941162109375, "pos_frac": 0.890625, "sample": [856.9788818359375, 265.44219970703125, -120.64405059814453, -10.235950469970703, 160.810546875, 243.359130859375, 157.77638244628906, 431.4959716796875, 227.2360076904297, 398.3919982910156, 427.79022216796875, 302.98077392578125, 270.958740234375, 245.26901245117188, 158.76075744628906, 666.9029541015625, 62.9862060546875, 119.51065063476562, 585.8781127929688, 183.1904754638672, 442.76513671875, -1.2904052734375, 689.0139770507812, -94.04586791992188, -79.56008911132812, 113.44971466064453, 504.5115966796875, 724.535888671875, 311.45001220703125, 789.1140747070312, 163.3245849609375, 977.5941162109375, 261.9427795410156, -18.365283966064453, 832.4131469726562, 428.6236572265625, 352.5751037597656, 344.9100646972656, 751.29638671875, 155.91806030273438, 892.4862060546875, 119.13575744628906, 616.2839965820312, 159.6561279296875, 172.20327758789062, 519.6439819335938, 8.137874603271484, 771.6807861328125, 129.63873291015625, 461.6311340332031, 600.4118041992188, 132.16275024414062, -36.595558166503906, 172.4106903076172, 448.57330322265625, 241.60525512695312, 17.122421264648438, 75.04046630859375, 91.45331573486328, 244.1796417236328, 318.18994140625, 274.36566162109375, 212.41091918945312, 318.6008605957031], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000656.npy"}
|
|
{"epoch": 0.9632892804698973, "step": 657, "batch_size": 64, "mean": 295.7341003417969, "std": 271.88336181640625, "min": -147.03350830078125, "p10": -14.415662574768053, "median": 261.1901092529297, "p90": 614.5281188964846, "max": 1048.3612060546875, "pos_frac": 0.875, "sample": [222.09091186523438, 16.208171844482422, 52.96954345703125, 269.1964111328125, 579.2802734375, 1043.948974609375, 322.34930419921875, 93.27568054199219, 502.0123291015625, -142.71435546875, -109.20663452148438, 228.82972717285156, 401.720458984375, 20.012527465820312, 307.52178955078125, 402.93511962890625, 38.93760681152344, 382.4625549316406, 74.73571014404297, 629.6343383789062, 766.51171875, 454.91864013671875, 123.55435943603516, 1014.864013671875, 568.2645263671875, 734.5173950195312, 135.66676330566406, 135.63409423828125, 434.26611328125, 366.500244140625, 244.64773559570312, 447.9088439941406, 24.672710418701172, -87.87702941894531, 402.284423828125, 498.9083251953125, 262.7899169921875, -2.1723556518554688, 132.13916015625, 190.80419921875, 251.63064575195312, -70.31716918945312, -30.59294319152832, 259.5903015136719, 740.5045166015625, 104.5223159790039, 162.7215118408203, 368.3095703125, 385.51873779296875, 105.53968048095703, 534.3567504882812, 530.4860229492188, 309.94659423828125, 406.33349609375, 258.78839111328125, 194.20767211914062, 440.1095886230469, 1048.3612060546875, 283.5768127441406, -147.03350830078125, 99.71700286865234, -19.66279411315918, 229.66510009765625, 295.69873046875], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000657.npy"}
|
|
{"epoch": 0.9647577092511013, "step": 658, "batch_size": 64, "mean": 222.88583374023438, "std": 227.65850830078125, "min": -248.96890258789062, "p10": -85.17637023925779, "median": 217.4515151977539, "p90": 464.0990478515625, "max": 932.5154418945312, "pos_frac": 0.859375, "sample": [173.96334838867188, 124.23727416992188, 9.555328369140625, -120.19972229003906, 0.4120979309082031, 96.16732788085938, 462.90045166015625, 175.4853515625, -66.62397766113281, 384.10260009765625, 296.3211669921875, 735.195068359375, 127.85105895996094, 343.0739440917969, 146.40322875976562, -46.29521179199219, 88.69889831542969, 214.81875610351562, 112.32672882080078, -93.12739562988281, 459.3637390136719, 84.11968231201172, 336.6787109375, 138.20730590820312, -243.7122039794922, 388.4404602050781, 156.79757690429688, 303.9941711425781, 488.953369140625, -113.49518585205078, 33.92274475097656, 83.3436279296875, 47.566402435302734, 512.0810546875, 247.99070739746094, 932.5154418945312, 38.362571716308594, 464.61273193359375, 240.64028930664062, 127.42709350585938, 221.1448516845703, 226.48043823242188, 451.1549987792969, 308.23944091796875, 407.62445068359375, 385.34765625, 447.2328796386719, 270.7043762207031, 295.168212890625, 416.15728759765625, 229.8369598388672, 394.7989196777344, -94.8941650390625, 655.9102172851562, 202.8248291015625, 339.3538513183594, 357.27691650390625, 623.244384765625, -248.96890258789062, 220.0842742919922, 140.9533233642578, 145.82009887695312, 98.87764739990234, -122.75696563720703], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000658.npy"}
|
|
{"epoch": 0.9662261380323054, "step": 659, "batch_size": 64, "mean": 233.79193115234375, "std": 256.9798889160156, "min": -231.36436462402344, "p10": -62.39492263793943, "median": 195.9837646484375, "p90": 558.0632263183594, "max": 1085.281005859375, "pos_frac": 0.8125, "sample": [402.7525329589844, -24.306121826171875, 117.15121459960938, 196.69068908691406, 198.9986114501953, 69.18418884277344, 593.0225830078125, 11.34442138671875, -231.36436462402344, -81.60554504394531, 770.8532104492188, 176.36123657226562, 357.7180480957031, -169.1323699951172, 305.48846435546875, 7.754692077636719, 329.3155517578125, 218.57568359375, 266.336181640625, 149.14981079101562, 152.2768096923828, 276.2578125, -166.6237335205078, 1085.281005859375, 10.903099060058594, 797.7536010742188, 303.1592712402344, 31.807701110839844, 691.0902099609375, 341.58233642578125, 228.02743530273438, 325.23553466796875, -1.2248878479003906, 139.67967224121094, 492.810791015625, 161.3947296142578, 302.1563720703125, 87.21551513671875, 40.24458312988281, 418.4166259765625, -19.593338012695312, -42.081993103027344, 340.3905029296875, -106.30789184570312, 532.7432250976562, -3.1680030822753906, 4.467140197753906, 186.60885620117188, 547.6129150390625, 712.0346069335938, 562.5419311523438, 161.4586944580078, 141.0494384765625, 318.6501159667969, 252.99761962890625, 195.27684020996094, 426.35260009765625, 429.3932189941406, 474.2300720214844, -91.47483825683594, 324.8330078125, 135.2110137939453, -71.1004638671875, 168.8243408203125], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000659.npy"}
|
|
{"epoch": 0.9676945668135095, "step": 660, "batch_size": 64, "mean": 267.2454528808594, "std": 247.10829162597656, "min": -262.08209228515625, "p10": -5.877524185180654, "median": 283.8490753173828, "p90": 527.9908996582031, "max": 1017.681884765625, "pos_frac": 0.890625, "sample": [55.24238586425781, 215.35935974121094, 321.5257568359375, 347.4762268066406, 91.46958923339844, 279.4961242675781, 448.44598388671875, 303.7064208984375, 354.1130676269531, 389.5232849121094, 188.39060974121094, 3.4283790588378906, 360.1114196777344, 318.08319091796875, 308.36541748046875, 845.0293579101562, 98.69764709472656, -50.33662414550781, 293.95849609375, 327.7994079589844, 293.7814636230469, 341.44146728515625, 126.0971450805664, 299.1477355957031, 529.9315795898438, 774.943603515625, -70.27735137939453, -110.84966278076172, 406.8677062988281, 407.9035339355469, 3.9081954956054688, 625.411376953125, -106.89886474609375, 921.7783203125, 100.35629272460938, 344.6931457519531, 500.3784484863281, 95.57914733886719, 264.3092041015625, 202.7104949951172, 139.0114288330078, 67.76896667480469, 321.1199645996094, 326.8984375, 7.554901123046875, 79.58706665039062, 95.00281524658203, 1017.681884765625, 523.412353515625, 375.67364501953125, -9.865768432617188, 271.83416748046875, -262.08209228515625, 567.3441162109375, 523.462646484375, 194.14292907714844, 212.58596801757812, 205.68368530273438, 288.2020263671875, 431.88543701171875, 212.97064208984375, -210.91522216796875, 48.48255157470703, 225.16851806640625], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000660.npy"}
|
|
{"epoch": 0.9691629955947136, "step": 661, "batch_size": 64, "mean": 247.49441528320312, "std": 280.6598815917969, "min": -484.42535400390625, "p10": -4.32097015380859, "median": 225.26585388183594, "p90": 541.4868438720705, "max": 1291.995361328125, "pos_frac": 0.875, "sample": [104.23922729492188, 75.89302825927734, 373.0769348144531, 71.98652648925781, 182.22036743164062, 39.04799270629883, 340.9903259277344, -141.37380981445312, 346.54156494140625, 348.56207275390625, 557.388427734375, 348.6258544921875, 133.16090393066406, 664.2230224609375, 1291.995361328125, 269.62274169921875, 165.90133666992188, -0.497406005859375, -113.12197875976562, 68.51681518554688, 179.87449645996094, 408.36474609375, 124.18966674804688, 186.35855102539062, 45.465782165527344, 237.6737518310547, 165.2094268798828, 451.9853210449219, 752.9951171875, 39.20063781738281, 8.747177124023438, 504.3831481933594, 40.84417724609375, 560.3780517578125, 268.2178649902344, 168.7570343017578, 309.88690185546875, 251.70254516601562, 194.61935424804688, 142.75946044921875, -236.3807830810547, 247.32740783691406, -5.9596405029296875, 461.3128662109375, 334.6412048339844, -157.1005859375, 373.5513000488281, 244.02120971679688, 12.396623611450195, 457.1971435546875, 943.154541015625, -484.42535400390625, 212.8579559326172, -24.738176345825195, 436.6184997558594, 460.2098083496094, 71.26549530029297, 437.8598937988281, 10.630922317504883, 846.9835205078125, 416.4688720703125, 347.3136291503906, 239.11659240722656, 26.707008361816406], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000661.npy"}
|
|
{"epoch": 0.9706314243759178, "step": 662, "batch_size": 64, "mean": 209.2576141357422, "std": 270.2919921875, "min": -355.88739013671875, "p10": -177.46720123291013, "median": 218.40860748291016, "p90": 577.8447631835938, "max": 800.318359375, "pos_frac": 0.78125, "sample": [92.67581939697266, 231.1895751953125, 697.2664794921875, -355.88739013671875, 478.7403259277344, 27.499954223632812, 353.6947021484375, 202.06427001953125, 795.0858764648438, 234.37789916992188, 501.8960266113281, -76.85809326171875, -188.56227111816406, 95.87779235839844, 420.1005554199219, 57.747825622558594, 160.52037048339844, 146.62644958496094, 800.318359375, 306.4812316894531, 583.2651977539062, -17.364013671875, 288.70318603515625, 28.319210052490234, 164.42276000976562, 402.4373779296875, 138.9076385498047, 666.8701171875, 103.36880493164062, -44.00732421875, 224.3712158203125, -228.81011962890625, 257.39630126953125, -151.57870483398438, 186.7749786376953, 411.6324462890625, 205.11248779296875, 387.70477294921875, 503.6981506347656, 366.67919921875, 34.77140808105469, 212.4459991455078, 511.2219543457031, 589.7796630859375, 335.3997802734375, 458.951904296875, -258.10455322265625, 256.530029296875, 270.315185546875, 178.6655731201172, 49.39667510986328, 14.88582992553711, -44.10084533691406, 254.47991943359375, -273.3623046875, -78.00382232666016, 632.7598876953125, 565.1970825195312, 263.4482116699219, 258.6942138671875, 233.42645263671875, -260.736083984375, -196.1700439453125, -76.16377258300781], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000662.npy"}
|
|
{"epoch": 0.9720998531571219, "step": 663, "batch_size": 64, "mean": 241.3779754638672, "std": 284.4305725097656, "min": -512.3572387695312, "p10": -31.580833435058572, "median": 202.93169403076172, "p90": 607.2178527832032, "max": 1234.923828125, "pos_frac": 0.875, "sample": [573.8427734375, 287.8787841796875, 485.8184814453125, 50.08885192871094, 365.7981262207031, 388.7493896484375, 86.21965026855469, 45.600250244140625, -41.014495849609375, 202.45953369140625, 121.70694732666016, 110.0418701171875, 553.1087036132812, 497.18280029296875, 56.32316589355469, 302.0653991699219, 390.5500793457031, 231.37603759765625, -286.4247741699219, 37.73211669921875, 172.16220092773438, 231.09075927734375, -102.71728515625, 187.23959350585938, 314.73760986328125, 300.728759765625, 302.6399841308594, 118.04949951171875, 366.463623046875, 1234.923828125, -196.3524627685547, 596.7093505859375, 901.6473388671875, -9.568954467773438, 301.78070068359375, 821.4129638671875, 611.7214965820312, 52.41128158569336, 52.018646240234375, 532.0223999023438, 642.247802734375, 213.82748413085938, 118.33056640625, 152.198974609375, 220.75823974609375, 289.3299560546875, 203.4038543701172, 139.86915588378906, 190.57861328125, 748.5028076171875, 271.41357421875, 126.89315032958984, 623.3848266601562, -512.3572387695312, 362.47662353515625, 36.07279586791992, 9.242279052734375, -72.61164093017578, 216.8988800048828, 77.6485366821289, 22.365859985351562, 72.22136688232422, -51.686676025390625, 98.95565032958984], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000663.npy"}
|
|
{"epoch": 0.973568281938326, "step": 664, "batch_size": 64, "mean": 275.0943298339844, "std": 311.2858581542969, "min": -714.898681640625, "p10": -36.33983612060546, "median": 223.981201171875, "p90": 702.1109130859376, "max": 1116.51416015625, "pos_frac": 0.84375, "sample": [657.9041137695312, 157.01986694335938, 267.5700378417969, 691.7188110351562, 519.6796875, 223.02371215820312, 199.2125244140625, 626.215087890625, -30.836692810058594, 810.1997680664062, -192.29345703125, 662.6526489257812, 276.0460205078125, 77.98101043701172, 714.4841918945312, 98.73994445800781, -195.22848510742188, 357.43072509765625, 74.87528228759766, 331.470947265625, 365.8856506347656, 560.27734375, -4.11407470703125, 747.7666625976562, -150.45333862304688, 80.67221069335938, 705.304931640625, 54.573917388916016, 124.01031494140625, -165.77853393554688, 694.658203125, 245.40951538085938, -71.32244873046875, 282.92181396484375, 206.0029296875, 301.562255859375, -714.898681640625, 139.63461303710938, 569.3113403320312, 487.1504821777344, 120.03102111816406, 338.5880432128906, 218.40957641601562, 118.934326171875, 431.290771484375, 1116.51416015625, 422.94921875, 68.63008880615234, 737.3367919921875, 12.9149169921875, -29.29144287109375, 758.4129638671875, 399.4195251464844, 73.13276672363281, 185.21820068359375, 292.72637939453125, 224.93869018554688, 130.936767578125, 60.27702331542969, 72.58987426757812, 330.84912109375, 124.99624633789062, 648.4887084960938, -38.698326110839844], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000664.npy"}
|
|
{"epoch": 0.9750367107195301, "step": 665, "batch_size": 64, "mean": 240.25119018554688, "std": 244.1471405029297, "min": -323.77532958984375, "p10": -54.2901885986328, "median": 196.16675567626953, "p90": 565.0394897460939, "max": 1031.093994140625, "pos_frac": 0.84375, "sample": [-79.46566009521484, 55.22060775756836, 304.0549011230469, 148.77694702148438, 144.11380004882812, 371.4950256347656, 483.086669921875, 91.2418212890625, 592.9197998046875, 198.4650421142578, 292.5252990722656, 378.35089111328125, -68.69284057617188, 353.827880859375, 237.45584106445312, 33.730224609375, -69.51983642578125, 227.93142700195312, 110.93293762207031, 373.43115234375, 392.11083984375, -111.71684265136719, 339.8904113769531, 333.0683898925781, -59.21665954589844, 91.66704559326172, -323.77532958984375, 690.93115234375, 572.7279052734375, -11.482166290283203, 136.6829833984375, 129.97537231445312, 446.935546875, 477.1260070800781, 547.099853515625, 140.41213989257812, 505.68365478515625, 449.2174072265625, 118.61556243896484, 222.8546142578125, 140.77587890625, 645.324951171875, 451.8185119628906, -235.58355712890625, 193.86846923828125, 638.6312255859375, 21.85852813720703, 164.70001220703125, 247.31613159179688, 344.5942687988281, 147.130859375, 258.71441650390625, 410.4795837402344, 72.31292724609375, 690.4512939453125, -42.79508972167969, 137.76414489746094, -1.8880691528320312, 158.69375610351562, 338.3167419433594, 1031.093994140625, 75.92607116699219, 178.74737548828125, 39.134944915771484], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000665.npy"}
|
|
{"epoch": 0.9765051395007343, "step": 666, "batch_size": 64, "mean": 236.42100524902344, "std": 237.9355926513672, "min": -411.48333740234375, "p10": -69.91269149780273, "median": 232.4415054321289, "p90": 512.1518981933593, "max": 755.5458374023438, "pos_frac": 0.84375, "sample": [430.30560302734375, 99.11393737792969, 510.1805419921875, 284.418701171875, -254.35020446777344, 316.3133544921875, 512.9967651367188, 392.76165771484375, 103.1935806274414, 704.3414306640625, -21.980567932128906, 146.14395141601562, 220.28237915039062, -88.29010772705078, 161.85003662109375, 82.41539001464844, 372.8629150390625, 5.069694519042969, -57.799468994140625, 176.50674438476562, 622.0614624023438, 160.56268310546875, 416.62359619140625, 237.4061279296875, 755.5458374023438, -47.52912902832031, 442.05499267578125, 196.40443420410156, -75.10407257080078, -411.48333740234375, 313.7566223144531, 320.35308837890625, 199.94403076171875, 660.5463256835938, 486.8015441894531, 289.08367919921875, 488.03179931640625, -77.89837646484375, 269.03765869140625, 391.1272277832031, 86.95460510253906, 218.17050170898438, -134.7803497314453, 615.5863647460938, 136.2357940673828, 267.7914123535156, -317.5530700683594, 465.0065612792969, 227.4768829345703, 94.4085693359375, 308.2113037109375, 333.38348388671875, 276.4857482910156, 60.73062515258789, 157.12322998046875, 102.12234497070312, 185.18374633789062, 318.0660705566406, 571.6009521484375, 216.0581817626953, 91.88545989990234, 397.2802734375, 253.63674926757812, 466.2160339355469], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000666.npy"}
|
|
{"epoch": 0.9779735682819384, "step": 667, "batch_size": 64, "mean": 236.74664306640625, "std": 228.3551788330078, "min": -296.2386474609375, "p10": -39.6764663696289, "median": 234.09747314453125, "p90": 577.9815551757813, "max": 765.0818481445312, "pos_frac": 0.84375, "sample": [161.73922729492188, -41.98218536376953, 307.7280578613281, -296.2386474609375, 54.871002197265625, 316.6911315917969, 246.41357421875, 330.1195373535156, 349.487060546875, 464.86004638671875, 197.61488342285156, 76.87652587890625, 96.55350494384766, 213.3293914794922, 180.71554565429688, -73.78099822998047, 269.9013671875, 221.7813720703125, 754.4352416992188, 205.5266876220703, 410.05517578125, 164.51585388183594, 598.95068359375, 187.50799560546875, 382.2295837402344, 248.19839477539062, 368.1733703613281, 54.70740509033203, 443.53125, 344.3034973144531, 166.02285766601562, 293.6116943359375, 608.6322631835938, 430.239013671875, 258.82122802734375, -32.337371826171875, 87.02850341796875, 279.01904296875, 303.9131774902344, -34.29645538330078, -103.87513732910156, 743.538818359375, 598.8261108398438, 6.3804473876953125, 578.3826293945312, 577.0457153320312, 278.5538635253906, 125.03582000732422, 155.66311645507812, 348.3649597167969, -21.421180725097656, 279.3444519042969, -228.74327087402344, 402.4207458496094, 765.0818481445312, 179.30401611328125, 80.37003326416016, 381.3057556152344, -103.9720230102539, 120.31759643554688, -161.2236785888672, 83.71795654296875, 178.57257080078125, 289.3240966796875], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000667.npy"}
|
|
{"epoch": 0.9794419970631424, "step": 668, "batch_size": 64, "mean": 221.6362762451172, "std": 271.86480712890625, "min": -343.49737548828125, "p10": -95.46155624389648, "median": 189.19149017333984, "p90": 598.4747497558596, "max": 949.9822387695312, "pos_frac": 0.8125, "sample": [411.8967590332031, 215.80812072753906, 942.6273193359375, -343.49737548828125, 9.233501434326172, 332.8724670410156, 432.7035827636719, 112.57679748535156, 633.6068115234375, 184.05477905273438, 52.42441940307617, -116.63252258300781, 217.3558349609375, 125.29664611816406, 291.4454345703125, 360.28887939453125, 194.3282012939453, 32.41932678222656, -88.64488983154297, 62.435665130615234, -150.60467529296875, 371.801025390625, -96.44934844970703, 443.7401123046875, 258.2333984375, 177.48330688476562, 68.72976684570312, 108.96914672851562, 21.606613159179688, 452.95135498046875, 239.3871612548828, 294.5050964355469, 270.10791015625, -93.15670776367188, 545.0647583007812, 174.14906311035156, -212.29495239257812, 471.5074462890625, 145.9107666015625, 36.702369689941406, 278.69244384765625, 366.46893310546875, 621.36474609375, 116.12036895751953, -303.8553771972656, 663.5140991210938, 172.83184814453125, -136.1319122314453, 150.3916015625, -5.772451400756836, 324.41668701171875, 84.24242401123047, 202.2459716796875, 249.69100952148438, -46.566741943359375, 528.9921264648438, 323.9861755371094, 366.13226318359375, 949.9822387695312, 133.9541473388672, -15.589072227478027, 737.5636596679688, 49.401947021484375, 781.701416015625], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000668.npy"}
|
|
{"epoch": 0.9809104258443465, "step": 669, "batch_size": 64, "mean": 264.0494384765625, "std": 273.9309997558594, "min": -478.85882568359375, "p10": -55.13373870849609, "median": 257.9930877685547, "p90": 599.2013305664063, "max": 1094.07080078125, "pos_frac": 0.859375, "sample": [229.7698516845703, 41.51457977294922, -17.575098037719727, 453.40057373046875, -282.1207275390625, 550.79150390625, 314.770263671875, 415.99755859375, 112.84227752685547, -48.295494079589844, 1094.07080078125, -60.965213775634766, 20.39166259765625, 254.46380615234375, -96.32134246826172, 272.6189880371094, 646.564697265625, -58.064414978027344, 130.9232177734375, 278.8806457519531, 417.474609375, 323.5391845703125, 828.9183349609375, 250.6330108642578, 187.1165313720703, 609.7994384765625, 109.487060546875, 488.905029296875, 640.6544189453125, 95.54251098632812, 305.08935546875, 754.1025390625, 161.05023193359375, 511.9164733886719, 682.13134765625, -444.91815185546875, 51.370182037353516, 316.51861572265625, 544.7440185546875, 339.2093505859375, 221.078125, 228.258056640625, 62.456573486328125, 218.50662231445312, -478.85882568359375, 378.7595520019531, 282.6566162109375, 574.472412109375, 284.832763671875, 230.7608642578125, -70.4421157836914, 348.1173400878906, 156.48095703125, 194.01150512695312, 363.5068054199219, 379.05242919921875, 39.44999694824219, 261.5223693847656, 388.1866149902344, 463.3309631347656, 140.64236450195312, 404.0472412109375, 172.0555877685547, 229.33750915527344], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000669.npy"}
|
|
{"epoch": 0.9823788546255506, "step": 670, "batch_size": 64, "mean": 280.8297119140625, "std": 283.3674011230469, "min": -363.8268127441406, "p10": -42.611203002929685, "median": 258.00665283203125, "p90": 710.0275634765625, "max": 916.046875, "pos_frac": 0.84375, "sample": [170.9420166015625, 338.8302001953125, 215.59298706054688, 710.9738159179688, 278.8940124511719, 11.119573593139648, -202.24819946289062, 65.26692199707031, 450.1560974121094, 619.4959106445312, 189.6099395751953, 24.85162353515625, -363.8268127441406, 794.6212768554688, 541.20068359375, 761.8773803710938, 444.19024658203125, 220.75262451171875, 598.530517578125, 151.74374389648438, 224.3666534423828, -244.00546264648438, 731.4822387695312, -24.20545196533203, 290.4808349609375, 291.330078125, 264.3314514160156, 141.602294921875, 298.8302001953125, -43.249298095703125, 599.7753295898438, 579.5947265625, -71.55604553222656, 258.7733459472656, 261.14642333984375, 847.6369018554688, 480.07110595703125, 9.170581817626953, 366.6929626464844, 549.1963500976562, -4.048992156982422, 707.8196411132812, 227.9033660888672, -41.122314453125, 916.046875, 370.208740234375, 345.1507568359375, 249.3600616455078, -198.7742462158203, 7.799079895019531, 139.25241088867188, 771.6634521484375, 113.51348876953125, 61.73040771484375, 339.68902587890625, 108.9661636352539, 226.27261352539062, 536.4998168945312, 94.83575439453125, 359.745361328125, 185.00328063964844, 414.7611083984375, -50.452392578125, 257.2399597167969], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000670.npy"}
|
|
{"epoch": 0.9838472834067548, "step": 671, "batch_size": 64, "mean": 239.1703643798828, "std": 268.3020324707031, "min": -354.8358459472656, "p10": -100.13097686767576, "median": 255.10458374023438, "p90": 614.915399169922, "max": 836.8930053710938, "pos_frac": 0.828125, "sample": [31.903427124023438, -302.43536376953125, 184.30706787109375, 175.16220092773438, -28.65338134765625, 444.14532470703125, 184.58718872070312, 282.500732421875, 221.29522705078125, 481.468017578125, 280.87762451171875, 1.3388900756835938, 266.97576904296875, 416.3913879394531, 313.7618103027344, -11.784194946289062, 61.30043411254883, 250.46890258789062, 519.2495727539062, 302.35260009765625, 5.800634384155273, 836.8930053710938, -354.8358459472656, 699.6707763671875, 524.8496704101562, 163.23098754882812, 725.1343383789062, 348.6736755371094, 325.9521484375, 292.9547424316406, 62.999942779541016, -74.7957534790039, -138.5401611328125, 259.7402648925781, 481.8793029785156, -110.98892974853516, 309.6263427734375, 235.29043579101562, 503.9677734375, 387.052734375, 568.68408203125, 141.62338256835938, 17.199996948242188, 128.41650390625, 470.6675720214844, 80.75880432128906, 330.134765625, 735.7682495117188, -165.15194702148438, -29.10962677001953, 403.8198547363281, 300.2555236816406, 20.96443748474121, 30.81829833984375, 422.6017150878906, 128.1538543701172, -229.57797241210938, -171.34878540039062, 128.1228485107422, 656.6552124023438, 87.15550994873047, 753.063232421875, 634.7288208007812, 302.7295227050781], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000671.npy"}
|
|
{"epoch": 0.9853157121879589, "step": 672, "batch_size": 64, "mean": 218.25674438476562, "std": 270.58056640625, "min": -369.7100830078125, "p10": -111.47366409301758, "median": 201.03008270263672, "p90": 560.0447570800782, "max": 919.3651123046875, "pos_frac": 0.78125, "sample": [53.12901306152344, -87.62933349609375, -105.38726043701172, 171.31475830078125, -60.15446853637695, 107.49397277832031, 496.39190673828125, 285.81256103515625, 585.0228271484375, 112.02030181884766, 39.85643768310547, 139.19631958007812, -129.20883178710938, 45.42298126220703, 268.01025390625, -44.89147186279297, 71.19143676757812, -173.9470977783203, 373.77996826171875, 445.89630126953125, 75.28219604492188, 553.40966796875, 28.718910217285156, -114.08212280273438, 454.99847412109375, -25.67469024658203, 773.2677001953125, -184.99200439453125, 269.7381591796875, 919.3651123046875, -53.97062683105469, 681.4552001953125, 670.5189208984375, 27.65606689453125, 562.8883666992188, 399.4639892578125, 230.7454071044922, 291.5906982421875, 239.20062255859375, 545.92724609375, -198.51953125, 75.1988296508789, 269.04058837890625, 457.1050109863281, 355.20465087890625, 411.42938232421875, 122.7699966430664, 162.0164794921875, 307.693603515625, 163.08148193359375, 370.7898864746094, 288.2845153808594, -29.25689697265625, 361.5561218261719, 82.43804931640625, 113.28518676757812, 388.19781494140625, 261.29705810546875, 162.6054229736328, -369.7100830078125, 357.593994140625, 479.92169189453125, 735.73828125, -298.1578674316406], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000672.npy"}
|
|
{"epoch": 0.986784140969163, "step": 673, "batch_size": 64, "mean": 264.72637939453125, "std": 215.31802368164062, "min": -190.32156372070312, "p10": -13.527724456787098, "median": 264.6934814453125, "p90": 541.1322387695313, "max": 786.9127807617188, "pos_frac": 0.875, "sample": [-18.072132110595703, -2.9241065979003906, 236.65248107910156, 393.4254150390625, 133.73956298828125, 432.6466064453125, 280.3122253417969, 253.08290100097656, 566.9228515625, 301.5181579589844, -153.68406677246094, 568.6944580078125, 284.1542053222656, 617.1248779296875, 255.96339416503906, 444.4744873046875, -149.12493896484375, 326.9530944824219, 786.9127807617188, 264.39202880859375, 151.8336944580078, 264.99493408203125, 285.0606994628906, -31.295333862304688, 359.15142822265625, -52.22901153564453, 224.99765014648438, 345.06573486328125, 13.350496292114258, 67.67829895019531, 427.0845031738281, 480.48486328125, 176.9291229248047, 215.52711486816406, 325.56976318359375, 554.5961303710938, 324.983154296875, 91.36082458496094, 482.52191162109375, 339.8533020019531, 408.202392578125, -32.271873474121094, 474.445556640625, 415.545654296875, 188.0257568359375, 58.748878479003906, 24.46827507019043, 6.956544876098633, 356.67596435546875, 781.7401123046875, 174.44961547851562, 152.40383911132812, 398.0689697265625, 440.1209411621094, -190.32156372070312, 353.49224853515625, 226.14402770996094, 509.71649169921875, 198.088623046875, 44.137664794921875, 57.001747131347656, 171.28900146484375, 630.3304443359375, 224.34410095214844], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000673.npy"}
|
|
{"epoch": 0.9882525697503671, "step": 674, "batch_size": 64, "mean": 270.78631591796875, "std": 263.3953552246094, "min": -212.7404022216797, "p10": -41.22509002685544, "median": 263.4559783935547, "p90": 604.6863220214846, "max": 1172.610107421875, "pos_frac": 0.859375, "sample": [100.68234252929688, 44.4670524597168, 425.91107177734375, 331.67462158203125, 300.0259704589844, 469.1602783203125, 132.69679260253906, 136.9260711669922, -159.22210693359375, 672.9899291992188, 408.0042419433594, 663.3731689453125, -212.7404022216797, 563.1494140625, -52.25685119628906, 111.73975372314453, -15.484315872192383, 859.5597534179688, 387.7994079589844, 1172.610107421875, 348.6669921875, 207.85333251953125, -77.31614685058594, 518.8009033203125, 214.04885864257812, 65.95477294921875, 432.2328796386719, 316.4567565917969, 319.7586669921875, 63.69696044921875, 301.92535400390625, 790.0297241210938, 69.53262329101562, 319.54034423828125, 266.12567138671875, 521.8602905273438, -88.79788208007812, -11.391105651855469, 164.41404724121094, 511.2296447753906, 255.17581176757812, 234.3331298828125, 410.5411376953125, 378.4686584472656, 33.045166015625, 622.4878540039062, 413.8603820800781, 340.7918701171875, 163.41709899902344, 270.7137145996094, -187.73394775390625, 50.24093246459961, 666.3505249023438, -209.94607543945312, 147.8145751953125, 362.28076171875, 215.67791748046875, 466.70721435546875, 260.7862854003906, 311.72503662109375, 222.47079467773438, 5.255998611450195, 154.447998046875, 145.72120666503906], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000674.npy"}
|
|
{"epoch": 0.9897209985315712, "step": 675, "batch_size": 64, "mean": 286.111572265625, "std": 282.7879943847656, "min": -176.82516479492188, "p10": -36.6676544189453, "median": 263.2550964355469, "p90": 660.3961791992189, "max": 1222.211669921875, "pos_frac": 0.84375, "sample": [53.95075988769531, 367.655517578125, 366.8067321777344, 150.3109588623047, 195.05328369140625, 131.9730987548828, 581.1199951171875, -6.798736572265625, 288.3990783691406, 304.50262451171875, 527.8561401367188, -176.82516479492188, 445.2337951660156, 767.1233520507812, 17.04914093017578, -18.892913818359375, 7.6221923828125, 640.9979248046875, 242.05929565429688, 263.7704162597656, 303.593994140625, 15.926322937011719, 1222.211669921875, 427.0924072265625, 52.98906707763672, 156.14923095703125, -103.13665771484375, 433.55084228515625, 332.3986511230469, 133.72389221191406, 361.43695068359375, 472.12799072265625, 298.26214599609375, 535.198974609375, 11.882171630859375, 351.109619140625, 886.3457641601562, -62.58552551269531, 597.2962036132812, 756.926025390625, 159.1935272216797, -11.308013916015625, 20.515106201171875, 262.7397766113281, 312.91375732421875, -44.285400390625, -141.3142547607422, 245.14566040039062, 461.9588317871094, 327.1716613769531, 158.02601623535156, 615.74462890625, 231.58535766601562, -102.482177734375, -123.07198333740234, 244.6871795654297, 175.4869384765625, 353.23675537109375, 277.3587646484375, 668.709716796875, 83.94931030273438, 857.6197509765625, 750.1930541992188, 195.89950561523438], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000675.npy"}
|
|
{"epoch": 0.9911894273127754, "step": 676, "batch_size": 64, "mean": 251.72601318359375, "std": 241.6733856201172, "min": -295.62109375, "p10": -21.516743850708, "median": 249.77208709716797, "p90": 554.9471496582032, "max": 793.761474609375, "pos_frac": 0.859375, "sample": [192.47064208984375, 561.6909790039062, 379.37103271484375, 354.18499755859375, 420.296630859375, -50.755653381347656, 453.4905700683594, 248.72305297851562, -36.77589797973633, -295.62109375, 66.71234130859375, 47.301910400390625, 38.56620788574219, 477.3963317871094, 539.2115478515625, 735.6438598632812, 217.68197631835938, 250.8211212158203, 128.9824981689453, -24.262386322021484, 43.040687561035156, 42.72233581542969, 407.6423034667969, -6.341644287109375, 21.650726318359375, 361.8006286621094, 87.16421508789062, 343.24822998046875, 139.38375854492188, 793.761474609375, 80.48318481445312, 380.0628356933594, 209.59793090820312, 476.7504577636719, -69.08159637451172, 126.50335693359375, 597.9346313476562, 257.2064208984375, 269.23992919921875, 792.2750854492188, 431.9073486328125, 109.47306060791016, 63.83921813964844, 79.85444641113281, 452.8006286621094, -80.90664672851562, 30.631622314453125, 479.404296875, 148.5513916015625, 145.51605224609375, 673.9080200195312, 393.78167724609375, 381.568115234375, 357.484619140625, 67.2284927368164, 369.56500244140625, 632.0946655273438, -263.94476318359375, 493.74603271484375, 60.70335388183594, 306.2591552734375, 347.302734375, -15.110244750976562, 384.6302490234375], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000676.npy"}
|
|
{"epoch": 0.9926578560939795, "step": 677, "batch_size": 64, "mean": 237.49810791015625, "std": 216.1044158935547, "min": -245.17745971679688, "p10": -40.12224731445312, "median": 214.01729583740234, "p90": 504.6497772216797, "max": 705.70556640625, "pos_frac": 0.796875, "sample": [-8.327445983886719, 705.70556640625, 226.13803100585938, 93.04576873779297, 180.92861938476562, 131.42938232421875, -105.1859359741211, -45.707923889160156, 460.62933349609375, -43.49737548828125, 201.8965606689453, 376.556640625, 35.16789245605469, 504.9670715332031, 157.85646057128906, 443.12255859375, 456.3794860839844, -123.37651062011719, 44.07444763183594, 144.6611785888672, 503.909423828125, 415.9045715332031, 488.90533447265625, -70.28688049316406, 240.06637573242188, 433.47845458984375, 353.99835205078125, 299.7398986816406, 347.2119140625, 543.48388671875, 182.87318420410156, 176.86355590820312, -17.73525047302246, 174.37905883789062, 345.16009521484375, -245.17745971679688, 555.6343994140625, 169.13853454589844, 327.30059814453125, 172.902587890625, 344.6859130859375, 239.74365234375, 420.203369140625, 471.2857666015625, -10.000656127929688, 484.59210205078125, 128.4832763671875, 97.45599365234375, 616.798583984375, 42.94932556152344, 578.0075073242188, -28.294971466064453, 322.4891052246094, 141.68850708007812, 456.16064453125, 169.9736328125, -60.594757080078125, 580.9600830078125, 358.3471374511719, 322.822998046875, 252.63177490234375, -32.2469482421875, -19.28689956665039, 86.80874633789062], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000677.npy"}
|
|
{"epoch": 0.9941262848751835, "step": 678, "batch_size": 64, "mean": 259.71539306640625, "std": 283.39495849609375, "min": -489.1426696777344, "p10": -56.183889007568354, "median": 282.53887939453125, "p90": 615.1205810546876, "max": 898.000732421875, "pos_frac": 0.84375, "sample": [63.237998962402344, 665.4219970703125, 390.86492919921875, 369.72784423828125, 223.9047393798828, -88.28298950195312, 421.1732177734375, -489.1426696777344, 61.0721435546875, 84.86087036132812, -57.706024169921875, 525.4446411132812, 101.18704223632812, 601.8619384765625, 71.73596954345703, 315.38739013671875, 358.1070251464844, -245.1278076171875, 306.54339599609375, 661.002685546875, 539.6702880859375, 39.42039489746094, 9.376348495483398, 795.2259521484375, 514.5503540039062, 860.5322265625, 274.30267333984375, 22.287399291992188, -13.19124984741211, 570.9785766601562, -142.82330322265625, -52.632240295410156, 55.322052001953125, 499.16552734375, 91.27517700195312, 539.406982421875, 107.49723052978516, 761.90673828125, 206.90444946289062, 360.81451416015625, -187.16441345214844, 356.3216552734375, 507.15679931640625, 379.52752685546875, 355.9442138671875, 47.42439651489258, 620.8028564453125, 898.000732421875, 141.11810302734375, 336.2347412109375, 385.7773742675781, -137.7845458984375, 311.021484375, 24.862442016601562, 59.50707244873047, 62.83982849121094, 136.4134063720703, 325.61376953125, 285.71490478515625, 279.36285400390625, 387.2635192871094, 70.9029541015625, 598.5675048828125, -4.908073425292969], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000678.npy"}
|
|
{"epoch": 0.9955947136563876, "step": 679, "batch_size": 64, "mean": 216.7694549560547, "std": 256.2690734863281, "min": -267.078857421875, "p10": -70.82558670043944, "median": 212.29241943359375, "p90": 542.9815185546876, "max": 991.7736206054688, "pos_frac": 0.8125, "sample": [380.44671630859375, 63.55817413330078, 273.2156677246094, 648.1168212890625, 78.11038208007812, 460.95562744140625, 452.7415771484375, 524.2879638671875, 488.31207275390625, 224.27740478515625, 265.7691650390625, 385.3019104003906, 194.57789611816406, 42.98539733886719, 670.4722290039062, -109.05660247802734, 66.0361557006836, -11.719879150390625, 242.75082397460938, 579.5796508789062, 358.6533508300781, 802.3392333984375, -43.88392639160156, 27.993770599365234, 112.35130310058594, -216.01174926757812, 138.735107421875, 422.18048095703125, -90.43916320800781, 212.96957397460938, 262.2967834472656, 50.214599609375, -48.09886169433594, 325.7810363769531, 211.61526489257812, 43.9832763671875, 609.16796875, 123.83404541015625, -45.03973388671875, 21.491920471191406, 42.870697021484375, 14.087814331054688, 353.2588195800781, 107.80545043945312, 326.3966979980469, 423.42645263671875, 8.739480972290039, -67.40228271484375, -267.078857421875, 20.32979965209961, 550.9930419921875, -240.42413330078125, 195.23953247070312, 47.3109245300293, 330.1598205566406, 332.4203796386719, 300.14532470703125, 259.83905029296875, 386.04327392578125, 991.7736206054688, 267.4494934082031, 500.98748779296875, -139.68763732910156, -72.29271697998047], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000679.npy"}
|
|
{"epoch": 0.9970631424375918, "step": 680, "batch_size": 64, "mean": 257.3198547363281, "std": 297.2369079589844, "min": -788.0159301757812, "p10": -20.09705276489257, "median": 194.4177017211914, "p90": 598.6830810546875, "max": 1074.1588134765625, "pos_frac": 0.875, "sample": [601.0656127929688, -24.969867706298828, 276.4747314453125, 481.2244873046875, 97.90074157714844, 485.8406982421875, 868.8558349609375, 269.25946044921875, 471.2471618652344, 193.2447509765625, 295.786376953125, 165.14524841308594, 195.5906524658203, 3.091888427734375, 490.7366638183594, 842.9857177734375, 192.16786193847656, -232.6495361328125, 11.778091430664062, 210.77615356445312, 845.767578125, 116.99799346923828, 30.883472442626953, 95.92668914794922, 45.988868713378906, 125.73194885253906, 150.24354553222656, 97.5907211303711, 328.98944091796875, 440.28057861328125, 165.30917358398438, 206.5381317138672, 106.45736694335938, 586.2319946289062, 474.765869140625, 95.02055358886719, 11.404678344726562, 463.955078125, 499.51263427734375, -100.57171630859375, 162.91665649414062, 729.4853515625, 323.04620361328125, 251.90866088867188, 136.9774932861328, -788.0159301757812, 504.9403381347656, -56.63983917236328, -12.048099517822266, 593.1238403320312, 276.57861328125, 1074.1588134765625, 13.550491333007812, 59.18721008300781, 118.24935913085938, 347.7630920410156, 455.48846435546875, 761.0621337890625, -23.54660415649414, 158.4481964111328, -66.37771606445312, 82.4560317993164, 237.44839477539062, 445.7318420410156], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000680.npy"}
|
|
{"epoch": 0.9985315712187959, "step": 681, "batch_size": 64, "mean": 229.41253662109375, "std": 266.88043212890625, "min": -242.45822143554688, "p10": -85.23811874389646, "median": 223.32898712158203, "p90": 605.5920471191407, "max": 983.3009033203125, "pos_frac": 0.78125, "sample": [87.61311340332031, -223.01861572265625, 314.636962890625, 177.6739501953125, 513.0836181640625, 14.446617126464844, 283.7520446777344, 310.9261474609375, -91.30779266357422, 196.10394287109375, -54.97515869140625, 327.7833251953125, -157.06263732910156, -54.01512145996094, 386.9124755859375, 983.3009033203125, 256.1066589355469, -242.45822143554688, -23.345535278320312, 443.019287109375, 77.17974090576172, 0.9512100219726562, 32.30091094970703, 165.05035400390625, 446.671142578125, 318.99981689453125, 213.36221313476562, 618.5704345703125, 411.9816589355469, 416.9144287109375, 173.32302856445312, 380.046630859375, 277.26397705078125, 5.014007568359375, -18.55132293701172, 233.29576110839844, 28.837602615356445, -212.19232177734375, 381.0151062011719, -43.70701217651367, -123.54698181152344, 756.9310302734375, 61.72942352294922, 431.6618957519531, -164.12783813476562, 123.71807861328125, -71.07554626464844, 146.59364318847656, 76.69355773925781, -39.670127868652344, 532.9130249023438, 176.25958251953125, 440.422607421875, 353.8252258300781, 298.3743896484375, 288.3167724609375, 575.3091430664062, 623.622314453125, 636.3676147460938, 332.67706298828125, 117.90790557861328, 680.9697265625, 782.0068359375, 289.02008056640625], "npy": "/root/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun/margin_logs/step_0000681.npy"}
|