682 lines
1.1 MiB
682 lines
1.1 MiB
{"epoch": 0.0, "step": 1, "batch_size": 64, "mean": -0.02287048101425171, "std": 0.42023447155952454, "min": -1.4034271240234375, "p10": -0.46674575805664065, "median": 0.04234886169433594, "p90": 0.4323463439941407, "max": 0.89263916015625, "pos_frac": 0.53125, "sample": [-0.06523895263671875, 0.436798095703125, 0.27811431884765625, -0.9194221496582031, 0.018890380859375, 0.20587158203125, 0.18878173828125, -0.3968696594238281, 0.26206207275390625, 0.2470550537109375, -0.040912628173828125, 0.4394989013671875, -0.44133758544921875, -0.39148712158203125, 0.2764854431152344, 0.89263916015625, -0.42584991455078125, -0.46125030517578125, -0.8638992309570312, -0.3508758544921875, 0.371368408203125, 0.887847900390625, -0.382904052734375, 0.36145782470703125, -0.4890003204345703, 0.052455902099609375, -0.036136627197265625, 0.23079299926757812, 0.2469482421875, 0.1643218994140625, -0.07129669189453125, 0.2790794372558594, 0.3637123107910156, -0.8916168212890625, 0.03298759460449219, -0.2790107727050781, -0.17860984802246094, 0.23892593383789062, 0.05171012878417969, -0.2564239501953125, -0.14655303955078125, 0.27777862548828125, 0.0810394287109375, -1.4034271240234375, -0.28739166259765625, -0.1489429473876953, 0.44918060302734375, 0.1693286895751953, 0.10933303833007812, -0.14766693115234375, -0.40944671630859375, -0.18532562255859375, 0.6261310577392578, -0.20856857299804688, 0.602569580078125, 0.05538177490234375, 0.1505279541015625, 0.1313800811767578, -0.006317138671875, 0.42195892333984375, -0.29936981201171875, -0.4691009521484375, 0.16705322265625, -0.5789260864257812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000001.npy"}
|
|
{"epoch": 0.0014684287812041115, "step": 2, "batch_size": 64, "mean": -0.06572240591049194, "std": 0.3523969054222107, "min": -0.9291305541992188, "p10": -0.46334152221679686, "median": -0.05502510070800781, "p90": 0.3672500610351563, "max": 1.0444793701171875, "pos_frac": 0.4375, "sample": [-0.2829437255859375, 0.3027191162109375, -0.19867706298828125, -0.3062286376953125, 0.10318756103515625, 0.20131683349609375, -0.34906005859375, 0.2802886962890625, 0.1914520263671875, -0.31072998046875, 0.08922195434570312, 0.10284614562988281, -0.03655242919921875, -0.0604095458984375, -0.06208038330078125, 0.32562255859375, -0.37982177734375, 0.2746162414550781, -0.049640655517578125, 0.3752174377441406, -0.103973388671875, 0.0699462890625, 0.36417388916015625, -0.033428192138671875, 0.37265777587890625, -0.3787078857421875, -0.6610565185546875, 0.4720420837402344, 0.47701263427734375, -0.27928924560546875, -0.44719696044921875, -0.0965118408203125, -0.7628555297851562, 0.046764373779296875, 0.06670379638671875, -0.9291305541992188, -0.7122802734375, -0.16554832458496094, 0.1485595703125, -0.07539939880371094, 0.2588920593261719, 0.039890289306640625, 0.201690673828125, 0.0623016357421875, 1.0444793701171875, -0.37696075439453125, -0.02794647216796875, -0.223297119140625, -0.35730743408203125, -0.1309051513671875, -0.3106689453125, -0.11409187316894531, -0.1669769287109375, 0.131317138671875, -0.2361297607421875, 0.4093780517578125, -0.6485977172851562, 0.36856842041015625, -0.1951904296875, -0.4702606201171875, -0.7624168395996094, 0.008928298950195312, -0.31630706787109375, 0.022550582885742188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000002.npy"}
|
|
{"epoch": 0.002936857562408223, "step": 3, "batch_size": 64, "mean": -0.0063214898109436035, "std": 0.29263725876808167, "min": -0.7884864807128906, "p10": -0.4050006866455078, "median": 0.01871967315673828, "p90": 0.3696033477783204, "max": 0.6647491455078125, "pos_frac": 0.515625, "sample": [-0.25975799560546875, -0.48389434814453125, -0.42066192626953125, -0.08747100830078125, 0.07093429565429688, 0.16068649291992188, 0.3574028015136719, 0.1372814178466797, 0.05029296875, 0.19922637939453125, -0.2255096435546875, -0.20222854614257812, -0.4464111328125, -0.3383941650390625, 0.025384902954101562, 0.3994903564453125, 0.22357177734375, -0.7884864807128906, 0.2975006103515625, -0.23572158813476562, -0.33176422119140625, -0.053375244140625, 0.3748321533203125, -0.5855560302734375, -0.4741363525390625, 0.04038047790527344, -0.0794219970703125, 0.030185699462890625, 0.2866497039794922, -0.06502532958984375, 0.5228195190429688, 0.2412261962890625, 0.04646492004394531, 0.012054443359375, -0.018838882446289062, 0.48188018798828125, -0.12237548828125, -0.3912086486816406, -0.15631103515625, 0.168914794921875, -0.07980728149414062, -0.10637664794921875, 0.45513153076171875, 0.086883544921875, -0.11824798583984375, -0.052280426025390625, 0.0696258544921875, 0.08156967163085938, -0.3565559387207031, -0.41091156005859375, 0.26354026794433594, -0.09646415710449219, 0.14566612243652344, 0.6647491455078125, 0.04693031311035156, 0.6484718322753906, 0.15070343017578125, -0.025989532470703125, 0.1833038330078125, 0.1400146484375, 0.031558990478515625, -0.2490692138671875, -0.07940673828125, -0.15824508666992188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000003.npy"}
|
|
{"epoch": 0.004405286343612335, "step": 4, "batch_size": 64, "mean": -0.0759580135345459, "std": 0.36372819542884827, "min": -0.869476318359375, "p10": -0.5696212768554687, "median": -0.10717391967773438, "p90": 0.45080108642578126, "max": 0.6917762756347656, "pos_frac": 0.390625, "sample": [-0.12559127807617188, -0.15245819091796875, -0.1810169219970703, -0.156707763671875, -0.7438430786132812, 0.06967544555664062, 0.3477630615234375, -0.686676025390625, -0.04104804992675781, -0.4476470947265625, 0.05824089050292969, 0.6917762756347656, 0.471405029296875, -0.18747711181640625, -0.570220947265625, 0.011486053466796875, -0.7214088439941406, 0.14379119873046875, -0.39173126220703125, 0.14043617248535156, -0.5819931030273438, -0.117462158203125, 0.1939849853515625, -0.4255218505859375, -0.01291656494140625, -0.212677001953125, 0.21015167236328125, 0.0788421630859375, -0.2456378936767578, 0.052978515625, -0.083343505859375, -0.085662841796875, -0.32323455810546875, -0.14973831176757812, -0.600067138671875, 0.5031337738037109, -0.09992218017578125, 0.2648468017578125, -0.3113365173339844, -0.0982208251953125, 0.15727996826171875, -0.12126922607421875, -0.447357177734375, 0.4513092041015625, 0.21038055419921875, 0.3154296875, -0.4470672607421875, 0.4837646484375, 0.2553272247314453, -0.47265625, 0.3073463439941406, 0.5628700256347656, 0.24791526794433594, -0.5682220458984375, -0.36545372009277344, 0.449615478515625, 0.6627197265625, -0.3406829833984375, -0.0524444580078125, -0.1144256591796875, -0.1698150634765625, -0.35148048400878906, -0.869476318359375, -0.12987136840820312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000004.npy"}
|
|
{"epoch": 0.005873715124816446, "step": 5, "batch_size": 64, "mean": -0.02732786536216736, "std": 0.39857810735702515, "min": -1.950653076171875, "p10": -0.4790130615234375, "median": 0.03408527374267578, "p90": 0.4086189270019531, "max": 0.7193527221679688, "pos_frac": 0.546875, "sample": [-0.4909515380859375, 0.41156005859375, 0.7193527221679688, 0.3675537109375, 0.1884765625, -1.950653076171875, 0.2838287353515625, 0.0603485107421875, 0.03170585632324219, -0.23768234252929688, -0.122528076171875, 0.435455322265625, 0.007946014404296875, -0.14524459838867188, -0.4288597106933594, 0.10703277587890625, 0.1904296875, 0.1372241973876953, 0.0235595703125, -0.464080810546875, 0.11975288391113281, 0.40175628662109375, -0.3351097106933594, -0.154632568359375, 0.11123847961425781, -0.15714263916015625, -0.03324127197265625, -0.22548675537109375, -0.48541259765625, -0.03655242919921875, 0.1568756103515625, 0.2053508758544922, 0.427642822265625, 0.25836181640625, 0.036464691162109375, -0.0419158935546875, 0.4827728271484375, -0.30080413818359375, 0.41355133056640625, 0.1424560546875, 0.141754150390625, 0.0824432373046875, 0.11666107177734375, -0.8726043701171875, 0.294036865234375, 0.5059661865234375, 0.07204437255859375, -0.3251800537109375, -0.1342620849609375, -0.39054107666015625, 0.222564697265625, -0.20138168334960938, 0.20781707763671875, -0.5222053527832031, -0.00186920166015625, -0.173583984375, -0.202056884765625, -0.49900054931640625, -0.26360321044921875, 0.3003120422363281, -0.5909881591796875, 0.3358039855957031, -0.3206939697265625, 0.35918426513671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000005.npy"}
|
|
{"epoch": 0.007342143906020558, "step": 6, "batch_size": 64, "mean": 0.027667373418807983, "std": 0.36957481503486633, "min": -0.9196224212646484, "p10": -0.496502685546875, "median": 0.07563304901123047, "p90": 0.4421979904174806, "max": 1.063995361328125, "pos_frac": 0.609375, "sample": [-0.03833770751953125, 0.12307167053222656, 0.000675201416015625, 0.0297393798828125, 0.14764785766601562, 0.5104522705078125, -0.9196224212646484, -0.5377044677734375, 0.5940093994140625, -0.2735443115234375, 0.08971786499023438, 0.06788063049316406, -0.46527099609375, 0.4574146270751953, -0.2684364318847656, -0.040313720703125, -0.0630035400390625, -0.2759532928466797, 0.2109527587890625, 0.02155303955078125, 0.004657745361328125, -0.24341201782226562, -0.416473388671875, -0.0488739013671875, 0.11568450927734375, 0.05733489990234375, 0.39391326904296875, 0.19002532958984375, 0.18288421630859375, -0.16960525512695312, -0.5098876953125, 0.6573657989501953, 0.20983123779296875, -0.07938003540039062, 0.08576202392578125, 0.601531982421875, -0.75054931640625, -0.5572166442871094, 0.2816429138183594, 0.02831268310546875, 0.36224365234375, 0.2419281005859375, 0.1853790283203125, 0.2714347839355469, 0.8223762512207031, 0.15557289123535156, 0.3389396667480469, 0.11944580078125, -0.4395904541015625, -0.2173004150390625, 0.11421966552734375, 0.26842498779296875, 0.08338546752929688, -0.05895042419433594, 0.4066925048828125, -0.1961841583251953, 0.173187255859375, -0.7134857177734375, 0.132659912109375, 0.095703125, -0.182373046875, -0.10394287109375, 1.063995361328125, -0.557525634765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000006.npy"}
|
|
{"epoch": 0.00881057268722467, "step": 7, "batch_size": 64, "mean": 0.030851304531097412, "std": 0.38409531116485596, "min": -0.7110519409179688, "p10": -0.4211700439453125, "median": -0.004039764404296875, "p90": 0.5575794219970706, "max": 0.95855712890625, "pos_frac": 0.5, "sample": [-0.06478118896484375, -0.667083740234375, -0.04272651672363281, -0.17118453979492188, -0.32442474365234375, 0.3579254150390625, -0.5526237487792969, 0.461090087890625, -0.6569671630859375, 0.18210601806640625, -0.1610107421875, 0.30944061279296875, -0.01221466064453125, 0.58258056640625, -0.569976806640625, 0.6551513671875, 0.20571136474609375, -0.09366416931152344, 0.17059898376464844, 0.02761077880859375, 0.042667388916015625, 0.4704132080078125, 0.8304481506347656, -0.19561004638671875, -0.1800537109375, 0.05692291259765625, -0.12700462341308594, 0.06492424011230469, -0.3686943054199219, 0.34452056884765625, 0.045169830322265625, 0.47263336181640625, -0.00868988037109375, 0.1340789794921875, 0.34930419921875, 0.371826171875, 0.0006103515625, -0.10022163391113281, -0.38641357421875, -0.7110519409179688, -0.234619140625, -0.2928466796875, 0.09600830078125, -0.04242706298828125, 0.13776779174804688, 0.0321502685546875, 0.219329833984375, -0.3668670654296875, 0.504913330078125, -0.15348052978515625, 0.95855712890625, -0.311737060546875, -0.436065673828125, 0.9234542846679688, -0.10088348388671875, 0.29662322998046875, -0.1958160400390625, -0.0848236083984375, 0.65985107421875, 0.5801506042480469, 0.17012786865234375, -0.306121826171875, -0.32904052734375, -0.491058349609375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000007.npy"}
|
|
{"epoch": 0.010279001468428781, "step": 8, "batch_size": 64, "mean": -0.010592788457870483, "std": 0.39657652378082275, "min": -1.546478271484375, "p10": -0.45051116943359376, "median": 0.023336410522460938, "p90": 0.48334197998046874, "max": 0.76129150390625, "pos_frac": 0.53125, "sample": [0.3569488525390625, 0.48175048828125, 0.27198028564453125, -0.10066604614257812, -0.43068695068359375, 0.021310806274414062, 0.5755271911621094, -0.009613037109375, 0.356292724609375, 0.23093414306640625, -0.5371780395507812, 0.05802154541015625, 0.17451095581054688, 0.2557868957519531, 0.1176910400390625, -0.134185791015625, 0.10250473022460938, 0.1966400146484375, 0.23147201538085938, 0.20780181884765625, -0.19420623779296875, -1.546478271484375, -0.1273345947265625, 0.76129150390625, -0.10384368896484375, -0.8636016845703125, 0.0075836181640625, 0.4981575012207031, 0.524017333984375, -0.374114990234375, -0.05486297607421875, -0.8019256591796875, -0.20975112915039062, 0.3148651123046875, -0.042568206787109375, -0.087799072265625, -0.151702880859375, 0.030748367309570312, 0.05193138122558594, 0.5313720703125, 0.18773651123046875, -0.25888824462890625, 0.07537841796875, -0.320343017578125, 0.10816001892089844, -0.929901123046875, -0.3537139892578125, -0.1009674072265625, 0.3343315124511719, -0.0886688232421875, -0.4572792053222656, 0.4840240478515625, -0.4723052978515625, -0.4347190856933594, 0.408294677734375, 0.027057647705078125, -0.10887908935546875, -0.22794342041015625, -0.0347442626953125, 0.20868682861328125, 0.025362014770507812, 0.160186767578125, -0.055072784423828125, 0.557647705078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000008.npy"}
|
|
{"epoch": 0.011747430249632892, "step": 9, "batch_size": 64, "mean": 0.028307169675827026, "std": 0.4377601444721222, "min": -1.091064453125, "p10": -0.6275459289550781, "median": 0.03544807434082031, "p90": 0.5717700958251957, "max": 1.0733718872070312, "pos_frac": 0.546875, "sample": [0.09737396240234375, -0.18262481689453125, 0.4240226745605469, 0.4305877685546875, -0.02410888671875, -0.09162139892578125, 0.18534088134765625, 0.19037628173828125, 0.7161865234375, 0.4748039245605469, -0.4491462707519531, -0.300079345703125, -0.6085281372070312, -0.79913330078125, -0.662139892578125, -1.091064453125, 0.25028228759765625, -0.24482345581054688, -0.6356964111328125, 0.05207061767578125, -0.4473762512207031, 0.11467552185058594, -0.2551422119140625, -0.08099365234375, 0.8000259399414062, 0.38748931884765625, 0.43141937255859375, -0.1795654296875, -0.09756851196289062, 1.0733718872070312, -0.1474456787109375, -0.16823196411132812, -0.09447479248046875, 0.22222900390625, -0.1457061767578125, 0.0401611328125, -0.11308479309082031, 0.011226654052734375, 0.6133270263671875, 0.7460556030273438, 0.06109619140625, -0.0189056396484375, -0.813385009765625, 0.23270416259765625, 0.07711601257324219, 0.25595855712890625, 0.1221466064453125, -0.0600433349609375, -0.06421661376953125, -0.13931655883789062, 0.1914825439453125, 0.032375335693359375, 0.37282562255859375, 0.42954444885253906, 0.15203475952148438, 0.3812408447265625, 0.9319305419921875, 0.6358184814453125, 0.3592205047607422, 0.011444091796875, -0.6510086059570312, 0.03852081298828125, -0.9200286865234375, -0.24936676025390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000009.npy"}
|
|
{"epoch": 0.013215859030837005, "step": 10, "batch_size": 64, "mean": 0.04434826970100403, "std": 0.39100170135498047, "min": -1.0137176513671875, "p10": -0.413632583618164, "median": 0.013975143432617188, "p90": 0.5165412902832032, "max": 1.19439697265625, "pos_frac": 0.515625, "sample": [0.2727813720703125, -0.2717437744140625, -0.16520309448242188, -0.049175262451171875, -0.30748748779296875, 0.2420806884765625, 0.020654678344726562, 0.09513092041015625, 0.524169921875, 0.01922607421875, 0.03290557861328125, 0.49874114990234375, -0.4726905822753906, 0.867889404296875, -1.0137176513671875, 0.5782928466796875, -0.3519248962402344, 0.008724212646484375, -0.1551036834716797, 0.32830810546875, 0.28823089599609375, -0.093170166015625, 0.304718017578125, 0.2261066436767578, -0.155853271484375, -0.23550796508789062, -0.44672393798828125, -0.0212860107421875, 0.1473369598388672, 0.2773551940917969, -0.2162017822265625, -0.4400787353515625, -0.013957977294921875, 0.04819488525390625, -0.1666107177734375, -0.2921943664550781, -0.738861083984375, 0.5316352844238281, 0.2778453826904297, -0.06663703918457031, -0.10917854309082031, -0.3029022216796875, -0.06506919860839844, 0.39794921875, -0.256103515625, 1.0105819702148438, 0.15808868408203125, 0.36019134521484375, -0.0944671630859375, 0.22785186767578125, -0.46984100341796875, -0.2203216552734375, 0.16357421875, 0.3595008850097656, -0.10784912109375, -0.6580581665039062, 0.39745330810546875, 0.16133880615234375, 0.2517547607421875, 1.19439697265625, 0.6267929077148438, -0.1801776885986328, 0.15876007080078125, -0.08217620849609375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000010.npy"}
|
|
{"epoch": 0.014684287812041116, "step": 11, "batch_size": 64, "mean": 0.016193389892578125, "std": 0.40519535541534424, "min": -1.169677734375, "p10": -0.38324813842773436, "median": 0.000370025634765625, "p90": 0.5429534912109376, "max": 1.0765762329101562, "pos_frac": 0.5, "sample": [0.19642257690429688, 0.2459259033203125, 0.16986083984375, 0.19841384887695312, 0.04689788818359375, -0.0189666748046875, 0.01970672607421875, 0.10541725158691406, -0.3009452819824219, -0.3902130126953125, -0.028299331665039062, -0.22309112548828125, 0.312408447265625, 0.13663482666015625, -0.1368408203125, -0.078216552734375, 0.24445343017578125, 0.12168502807617188, 0.707611083984375, 0.7709579467773438, 0.553436279296875, 0.421051025390625, -0.4535808563232422, 0.1489734649658203, -0.24309539794921875, 0.20868682861328125, -0.43511199951171875, -0.29213714599609375, 0.053314208984375, 0.31656646728515625, -0.2634429931640625, -0.25970458984375, 0.7466259002685547, 0.7404632568359375, -0.303131103515625, 0.5159149169921875, 0.08365631103515625, 1.0765762329101562, -0.8674774169921875, -0.11023330688476562, -0.18966293334960938, -0.3022346496582031, 0.24412918090820312, -0.7927398681640625, 0.09487152099609375, -0.13856124877929688, 0.245849609375, -0.2082233428955078, -1.169677734375, -0.0240631103515625, -0.047504425048828125, -0.2043304443359375, 0.51849365234375, -0.5001373291015625, 0.3654975891113281, -0.17545318603515625, -0.3635406494140625, -0.09309768676757812, -0.25205230712890625, 0.68017578125, 0.2457122802734375, 0.05950736999511719, -0.32675743103027344, -0.36699676513671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000011.npy"}
|
|
{"epoch": 0.016152716593245228, "step": 12, "batch_size": 64, "mean": -0.011764273047447205, "std": 0.39213281869888306, "min": -1.247467041015625, "p10": -0.4428184509277343, "median": -0.0026121139526367188, "p90": 0.3972772598266603, "max": 1.1016387939453125, "pos_frac": 0.5, "sample": [-0.054931640625, 0.101226806640625, 0.5074996948242188, -1.247467041015625, -0.0815582275390625, -0.09731292724609375, 0.153961181640625, 0.23241424560546875, 0.24811553955078125, 0.48799896240234375, 0.5938186645507812, -0.8863677978515625, -0.46079254150390625, 0.2689361572265625, -0.0977325439453125, -0.07403564453125, -0.08943939208984375, 0.016754150390625, 0.21479034423828125, -0.23778915405273438, -0.2808990478515625, -0.28430938720703125, -0.021392822265625, -0.8749771118164062, -0.6138229370117188, 0.18097686767578125, -0.40087890625, 0.8131561279296875, -0.286895751953125, -0.2027740478515625, -0.74774169921875, 1.1016387939453125, -0.307830810546875, 0.21657180786132812, -0.0384368896484375, 0.14559364318847656, -0.13222885131835938, -0.8183364868164062, -0.37078857421875, 0.07109451293945312, 0.08592605590820312, 0.20221710205078125, 0.09021759033203125, 0.4091224670410156, -0.16135406494140625, 0.316131591796875, -0.164642333984375, -0.0489654541015625, 0.19068241119384766, 0.16908645629882812, 0.0033588409423828125, 0.20508384704589844, 0.048583984375, 0.36963844299316406, 0.2143096923828125, -0.0367431640625, -0.1523284912109375, -0.07487869262695312, -0.020517349243164062, 0.04470634460449219, 0.4426994323730469, 0.14061355590820312, 0.3369140625, -0.00858306884765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000012.npy"}
|
|
{"epoch": 0.01762114537444934, "step": 13, "batch_size": 64, "mean": -0.047012150287628174, "std": 0.364255428314209, "min": -1.33050537109375, "p10": -0.4458404541015625, "median": -0.05298614501953125, "p90": 0.30151405334472664, "max": 1.255096435546875, "pos_frac": 0.4375, "sample": [0.1667938232421875, -0.30043601989746094, 0.14919281005859375, -0.0451812744140625, 0.1566925048828125, 0.1255340576171875, -0.02519989013671875, 0.028364181518554688, 0.32077789306640625, -0.4885749816894531, 0.2403106689453125, 0.5418930053710938, -0.30573272705078125, -0.34033966064453125, -0.3102455139160156, 1.255096435546875, -0.2371063232421875, -0.54498291015625, -0.09037017822265625, 0.331512451171875, -0.14129638671875, 0.0882720947265625, -0.22870635986328125, -0.1725788116455078, 0.2115802764892578, -0.5938262939453125, -0.07824325561523438, -0.3224163055419922, -0.11048126220703125, 0.259674072265625, -0.3221893310546875, 0.539459228515625, 0.2555999755859375, 0.2791557312011719, -1.33050537109375, -0.02823638916015625, -0.071014404296875, -0.36716270446777344, 0.2231769561767578, 0.439300537109375, -0.09713363647460938, 0.1936054229736328, -0.060791015625, -0.11131668090820312, 0.1425628662109375, 0.11996650695800781, -0.10684776306152344, 0.19369125366210938, -0.23321533203125, -0.16770172119140625, -0.25930023193359375, 0.24034881591796875, -0.10912322998046875, 0.12041854858398438, -0.7473373413085938, 0.0178375244140625, -0.01570892333984375, 0.14537811279296875, 0.31109619140625, -0.686614990234375, -0.459564208984375, -0.413818359375, 0.1171417236328125, -0.2999114990234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000013.npy"}
|
|
{"epoch": 0.01908957415565345, "step": 14, "batch_size": 64, "mean": -0.009398102760314941, "std": 0.41669103503227234, "min": -1.182403564453125, "p10": -0.45866889953613277, "median": -0.011407852172851562, "p90": 0.44987030029296876, "max": 1.34564208984375, "pos_frac": 0.484375, "sample": [-0.39453887939453125, 0.22813987731933594, 0.13343429565429688, 0.5767173767089844, 0.6347808837890625, 0.29390716552734375, -0.402374267578125, -0.026702880859375, 0.44788360595703125, 0.01555633544921875, -0.46744537353515625, -0.2677497863769531, 0.9580230712890625, 0.17157745361328125, -0.007724761962890625, -0.1069793701171875, 0.004535675048828125, 0.06220245361328125, 0.07183456420898438, -0.13935089111328125, -0.035369873046875, -0.180511474609375, 0.2013092041015625, 0.365570068359375, 0.907073974609375, -0.32419586181640625, -1.182403564453125, 0.0277862548828125, 0.0834808349609375, 0.11945343017578125, -0.198577880859375, -0.15015792846679688, 0.35778045654296875, 0.4784812927246094, -0.056026458740234375, 0.3504180908203125, 0.045848846435546875, -0.25067138671875, -0.5846843719482422, 0.2467041015625, -0.36676025390625, -0.0150909423828125, -0.8471832275390625, 0.45072174072265625, -0.28383636474609375, -0.11985015869140625, 0.2253704071044922, -0.20718765258789062, -0.02410888671875, -0.4381904602050781, -0.799224853515625, 0.0096893310546875, -0.4999542236328125, -0.6620826721191406, 0.153778076171875, 1.34564208984375, 0.16765594482421875, -0.1238861083984375, -0.4366455078125, 0.04666900634765625, -0.10549354553222656, 0.05745697021484375, -0.10584259033203125, -0.03015899658203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000014.npy"}
|
|
{"epoch": 0.020558002936857563, "step": 15, "batch_size": 64, "mean": 0.03897008299827576, "std": 0.4234123229980469, "min": -1.4273605346679688, "p10": -0.41009674072265623, "median": 0.06708431243896484, "p90": 0.5243247985839845, "max": 0.800506591796875, "pos_frac": 0.546875, "sample": [0.07308006286621094, 0.800506591796875, 0.62176513671875, -0.014194488525390625, 0.5373001098632812, 0.1269359588623047, -0.05018043518066406, 0.04088592529296875, -0.14908599853515625, 0.168487548828125, -0.368865966796875, -0.07359123229980469, 0.3956298828125, -0.6740341186523438, 0.36643218994140625, -0.3999176025390625, 0.23547744750976562, -0.0148162841796875, 0.7113800048828125, 0.21675872802734375, 0.4296150207519531, -1.4273605346679688, -0.115234375, -0.124786376953125, -0.6150608062744141, -0.0408935546875, 0.308685302734375, 0.5419692993164062, -0.3723907470703125, 0.27825927734375, -0.414459228515625, -0.9023284912109375, -0.30138397216796875, -0.09383201599121094, 0.31866455078125, 0.755828857421875, -0.6724014282226562, 0.44913482666015625, 0.47637939453125, -0.29077720642089844, -0.059520721435546875, 0.2473773956298828, 0.08864212036132812, 0.11743736267089844, 0.11130523681640625, -0.11505126953125, 0.05231666564941406, -0.1323223114013672, 0.1671600341796875, 0.11122512817382812, 0.06108856201171875, 0.4639434814453125, -0.267120361328125, -0.11083221435546875, 0.4344635009765625, 0.7389678955078125, 0.494049072265625, -0.39763641357421875, 0.3258171081542969, -0.2268524169921875, 0.11156082153320312, -0.7858428955078125, 0.3981895446777344, -0.07186126708984375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000015.npy"}
|
|
{"epoch": 0.022026431718061675, "step": 16, "batch_size": 64, "mean": 0.09726375341415405, "std": 0.37172242999076843, "min": -0.879669189453125, "p10": -0.37824935913085933, "median": 0.10445404052734375, "p90": 0.5905632019042969, "max": 0.858795166015625, "pos_frac": 0.625, "sample": [0.19019317626953125, 0.4799957275390625, -0.31462860107421875, 0.224609375, -0.7449722290039062, -0.062267303466796875, 0.0680084228515625, -0.007617950439453125, 0.08191680908203125, 0.858795166015625, 0.1602783203125, 0.753021240234375, 0.173370361328125, 0.58026123046875, 0.047153472900390625, -0.40435791015625, 0.0972137451171875, -0.6290740966796875, 0.05621337890625, 0.34716796875, 0.6839218139648438, 0.2839241027832031, 0.3307685852050781, -0.1306610107421875, 0.2739830017089844, -0.31497955322265625, -0.0305328369140625, -0.054042816162109375, 0.10043907165527344, 0.10846900939941406, 0.24984359741210938, 0.13521194458007812, -0.176849365234375, -0.13284683227539062, -0.060821533203125, 0.2630615234375, 0.5949783325195312, -0.879669189453125, -0.4062347412109375, -0.035037994384765625, 0.23688507080078125, 0.08458709716796875, -0.031452178955078125, 0.4200439453125, 0.31804656982421875, 0.6143951416015625, -0.08119583129882812, -0.08306884765625, -0.46338653564453125, -0.285736083984375, 0.1978740692138672, 0.34958648681640625, 0.254150390625, 0.3503608703613281, -0.0137481689453125, -0.31732940673828125, 0.12434959411621094, 0.568328857421875, 0.6738739013671875, 0.6480712890625, 0.0886688232421875, 0.43294525146484375, 0.24739837646484375, -0.866973876953125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000016.npy"}
|
|
{"epoch": 0.023494860499265784, "step": 17, "batch_size": 64, "mean": 0.015069544315338135, "std": 0.41791844367980957, "min": -1.1902618408203125, "p10": -0.4908855438232422, "median": 0.07535552978515625, "p90": 0.3731548309326172, "max": 1.2110748291015625, "pos_frac": 0.578125, "sample": [0.2750205993652344, 0.04689979553222656, 0.19811630249023438, 0.08203125, 0.1265411376953125, -0.23158836364746094, 0.35723876953125, -0.6027374267578125, 0.0686798095703125, -0.199554443359375, 0.5059814453125, 0.3990936279296875, -0.0925750732421875, -0.210205078125, -0.4695930480957031, 0.273712158203125, -0.4990348815917969, 0.22057533264160156, 0.29668426513671875, -0.44705963134765625, 0.103912353515625, -0.016469955444335938, 0.23983383178710938, 0.36907196044921875, -0.4172821044921875, 0.05684661865234375, 0.3749046325683594, -0.4821510314941406, 0.1087646484375, 0.1982421875, -0.49462890625, 0.3656768798828125, 0.3521881103515625, 0.03878021240234375, 0.20204925537109375, -0.0150604248046875, -0.695953369140625, 0.143341064453125, -0.1585216522216797, 0.1490478515625, 0.971710205078125, 0.10746383666992188, -0.3535785675048828, 0.19746780395507812, -1.1902618408203125, -0.47918701171875, 0.0535125732421875, -0.4983253479003906, -0.01889801025390625, -0.10223388671875, -0.07890701293945312, -0.16227340698242188, -0.1486358642578125, -0.23046112060546875, 0.114501953125, -1.146453857421875, 0.7750244140625, 0.6732254028320312, 1.2110748291015625, -0.0968017578125, 0.3616828918457031, 0.2346954345703125, 0.10565185546875, 0.14363861083984375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000017.npy"}
|
|
{"epoch": 0.024963289280469897, "step": 18, "batch_size": 64, "mean": 0.06848806142807007, "std": 0.3612028658390045, "min": -1.17144775390625, "p10": -0.29725894927978513, "median": 0.016361236572265625, "p90": 0.47162742614746106, "max": 0.9110870361328125, "pos_frac": 0.53125, "sample": [0.3938484191894531, -1.17144775390625, 0.4869384765625, 0.2597198486328125, -0.23044204711914062, -0.10054779052734375, -0.2858428955078125, 0.0824432373046875, 0.020782470703125, 0.34593772888183594, -0.3329315185546875, -0.10016822814941406, -0.2928009033203125, 0.4359016418457031, -0.2514190673828125, 0.9110870361328125, 0.4050312042236328, -0.15895843505859375, 0.24576568603515625, -0.10347747802734375, -0.13997459411621094, -0.13085174560546875, 0.271575927734375, 0.0020465850830078125, 0.3640937805175781, -0.04308128356933594, 0.4889984130859375, -0.06427001953125, -0.29916954040527344, 0.10344123840332031, 0.16184234619140625, 0.3882274627685547, -0.012256622314453125, -0.20107269287109375, 0.404144287109375, -0.00164794921875, 0.41259765625, 0.2035655975341797, -0.32576751708984375, -0.2250213623046875, 0.4194526672363281, 0.01194000244140625, -0.0544586181640625, 0.63812255859375, 0.4927825927734375, 0.24649810791015625, 0.2667999267578125, -0.4084739685058594, 0.2121734619140625, 0.6689109802246094, 0.3640785217285156, -0.12675094604492188, -0.19586944580078125, -0.3070030212402344, 0.33636474609375, 0.250762939453125, 0.503326416015625, -0.16545677185058594, -0.9583740234375, 0.3740520477294922, -0.15598678588867188, -0.2587699890136719, 0.38155364990234375, -0.06927871704101562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000018.npy"}
|
|
{"epoch": 0.02643171806167401, "step": 19, "batch_size": 64, "mean": 0.08037763833999634, "std": 0.358684241771698, "min": -0.6400909423828125, "p10": -0.3041576385498047, "median": 0.09328556060791016, "p90": 0.4556358337402344, "max": 1.34185791015625, "pos_frac": 0.640625, "sample": [0.3461875915527344, 0.11254310607910156, -0.3047218322753906, -0.062103271484375, 0.45740509033203125, -0.07598876953125, 0.22044754028320312, -0.09346389770507812, 0.15402793884277344, 1.34185791015625, 0.451507568359375, -0.18352508544921875, 0.5311012268066406, -0.2907257080078125, 0.3843536376953125, -0.6400909423828125, -0.5195465087890625, 0.09301376342773438, -0.29290008544921875, -0.1717987060546875, 0.53558349609375, 0.13629150390625, 0.14522933959960938, -0.4321136474609375, 0.06937599182128906, 0.3126850128173828, 0.42608642578125, 0.10906982421875, -0.24672317504882812, -0.275146484375, -0.19612884521484375, 0.3927745819091797, 0.09355735778808594, 0.04447746276855469, 0.21964263916015625, -0.2599773406982422, -0.3028411865234375, -0.16298294067382812, 0.07907867431640625, -0.31793212890625, 0.1068267822265625, 0.1011199951171875, 0.03899383544921875, -0.6284637451171875, 0.23926162719726562, 0.8798294067382812, 0.15900421142578125, 0.7327804565429688, 0.0795440673828125, 0.40476036071777344, 0.04248809814453125, 0.16030120849609375, 0.12324333190917969, 0.0086517333984375, 0.44449615478515625, 0.11481094360351562, 0.01911163330078125, -0.019054412841796875, -0.294830322265625, -0.511505126953125, 0.691619873046875, -0.1744232177734375, 0.2601776123046875, 0.33783721923828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000019.npy"}
|
|
{"epoch": 0.027900146842878122, "step": 20, "batch_size": 64, "mean": 0.046672046184539795, "std": 0.37399789690971375, "min": -1.1619949340820312, "p10": -0.3291997909545898, "median": 0.08116340637207031, "p90": 0.4895580291748047, "max": 0.8569259643554688, "pos_frac": 0.59375, "sample": [0.08279609680175781, 0.1089019775390625, -0.23363113403320312, 0.52764892578125, 0.4584197998046875, 0.4888343811035156, -0.15506744384765625, -0.3878631591796875, -0.086090087890625, 0.223968505859375, -1.1619949340820312, 0.1474456787109375, 0.0471038818359375, -0.11668014526367188, 0.15121841430664062, -0.0198822021484375, -0.15126419067382812, 0.6041450500488281, -0.059230804443359375, 0.1627044677734375, 0.25689697265625, -0.9395904541015625, 0.029552459716796875, -0.28544044494628906, 0.1142730712890625, 0.052730560302734375, -0.17404937744140625, 0.11529350280761719, 0.4167060852050781, -0.1386737823486328, -0.25121307373046875, -0.5757827758789062, -0.19298553466796875, 0.08136367797851562, 0.3071022033691406, -0.0180206298828125, -0.10272216796875, -1.0747909545898438, -0.08385086059570312, -0.34795379638671875, 0.544830322265625, 0.179534912109375, 0.25811767578125, 0.09050369262695312, 0.080963134765625, 0.1985931396484375, 0.485443115234375, 0.472503662109375, 0.8569259643554688, -0.0095062255859375, -0.35407257080078125, -0.26676177978515625, 0.19797515869140625, 0.1386871337890625, 0.6629867553710938, -0.06963348388671875, 0.516143798828125, 0.33006858825683594, 0.10330009460449219, 0.0238800048828125, 0.3405799865722656, -0.171661376953125, 0.4898681640625, 0.067413330078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000020.npy"}
|
|
{"epoch": 0.02936857562408223, "step": 21, "batch_size": 64, "mean": 0.1474984586238861, "std": 0.3634709119796753, "min": -0.723419189453125, "p10": -0.2752777099609375, "median": 0.1314220428466797, "p90": 0.6264558792114261, "max": 1.2214508056640625, "pos_frac": 0.671875, "sample": [0.32635498046875, 0.021070480346679688, 0.8905563354492188, 0.5196685791015625, 0.2195281982421875, 0.1618671417236328, -0.08082389831542969, -0.2817821502685547, -0.2698516845703125, 0.002655029296875, 0.857086181640625, -0.16355133056640625, -0.11597442626953125, 0.15728378295898438, 0.31781005859375, 0.7434921264648438, 0.18152999877929688, 0.1420879364013672, -0.1696949005126953, 0.1897125244140625, 0.0167236328125, 0.677276611328125, -0.10704803466796875, -0.20196914672851562, 0.13262939453125, 1.116973876953125, 0.106109619140625, -0.3267974853515625, -0.2776031494140625, -0.32053375244140625, -0.723419189453125, 0.37908935546875, 0.0729217529296875, 0.30052757263183594, -0.368011474609375, -0.147247314453125, -0.038425445556640625, 0.5401401519775391, 0.341217041015625, -0.19858551025390625, 0.40039825439453125, 0.24130630493164062, 0.34505462646484375, 0.030239105224609375, 0.019683837890625, -0.31935882568359375, 0.6634483337402344, -0.12625885009765625, 0.3321094512939453, 0.1842193603515625, 0.20565223693847656, 0.2879524230957031, 0.25458526611328125, 0.3422393798828125, 0.13021469116210938, -0.17774581909179688, 0.5116729736328125, 0.0988616943359375, 1.2214508056640625, 0.39272117614746094, -0.16469573974609375, 0.1011505126953125, 0.10162353515625, -0.2596168518066406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000021.npy"}
|
|
{"epoch": 0.030837004405286344, "step": 22, "batch_size": 64, "mean": 0.1367432177066803, "std": 0.3962913155555725, "min": -0.7413253784179688, "p10": -0.3664276123046875, "median": 0.12553119659423828, "p90": 0.628402328491211, "max": 1.0591278076171875, "pos_frac": 0.65625, "sample": [0.13176727294921875, 0.381195068359375, -0.2176666259765625, 0.04533958435058594, 0.33740234375, -0.1754302978515625, 0.1895294189453125, 0.3329181671142578, 0.11929512023925781, 0.49005126953125, 0.47566986083984375, 0.2647705078125, -0.2307281494140625, 0.8960037231445312, -0.2532825469970703, -0.110198974609375, 0.11385345458984375, 0.11917304992675781, 0.0473785400390625, -0.368194580078125, 0.09317398071289062, 0.5118560791015625, -0.33477020263671875, -0.6802978515625, -0.3896636962890625, 0.16933631896972656, 0.8435134887695312, 0.3906707763671875, -0.20754241943359375, 0.812957763671875, 0.13957595825195312, 0.3671875, 0.16526031494140625, -0.11339187622070312, 0.3016624450683594, 0.04998779296875, 0.38199615478515625, -0.7413253784179688, -0.25334930419921875, 0.47734832763671875, 0.6184234619140625, 0.3037147521972656, 0.21942138671875, -0.3890724182128906, -0.04895591735839844, 0.48772430419921875, -0.3623046875, -0.562530517578125, 1.0591278076171875, 0.21588897705078125, 0.03557586669921875, 0.1549072265625, -0.23744964599609375, -0.1738567352294922, -0.431121826171875, 0.17083740234375, 0.087921142578125, 0.07336807250976562, 0.6026763916015625, 0.866729736328125, -0.040740966796875, 0.6326789855957031, 0.916015625, -0.020444869995117188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000022.npy"}
|
|
{"epoch": 0.032305433186490456, "step": 23, "batch_size": 64, "mean": 0.19482776522636414, "std": 0.44403576850891113, "min": -0.6177215576171875, "p10": -0.28785400390625, "median": 0.17000198364257812, "p90": 0.6769685745239262, "max": 1.7056884765625, "pos_frac": 0.625, "sample": [-0.07014656066894531, 0.05693817138671875, 0.547393798828125, 0.0810699462890625, 0.3550224304199219, 0.3755378723144531, 0.33023834228515625, -0.25028228759765625, -0.13404273986816406, -0.2781982421875, 1.7056884765625, 0.9258956909179688, 0.344085693359375, 0.16305160522460938, 0.4906730651855469, -0.076934814453125, -0.0062808990478515625, 0.07576370239257812, 0.3538360595703125, 0.22523880004882812, -0.0371551513671875, -0.2919921875, 1.3695220947265625, 0.17695236206054688, -0.05773735046386719, -0.3694610595703125, -0.4470367431640625, -0.6177215576171875, 0.22798919677734375, -0.18310546875, -0.433685302734375, 0.4469146728515625, -0.307769775390625, 0.49596595764160156, 0.14493751525878906, -0.11614990234375, -0.1357879638671875, 0.2147369384765625, 0.024732589721679688, 0.5162124633789062, 0.3448677062988281, 0.3967781066894531, 0.2149200439453125, 0.20173263549804688, 0.05536651611328125, 1.4366607666015625, 0.2082977294921875, -0.003753662109375, 0.37813568115234375, -0.224365234375, -0.14125442504882812, 0.3909912109375, 0.4475517272949219, -0.3441028594970703, -0.04894447326660156, 0.36934661865234375, 0.762603759765625, 1.0135345458984375, -0.25838661193847656, 0.27392578125, 0.11069488525390625, -0.24169921875, 0.5671195983886719, 0.7240467071533203], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000023.npy"}
|
|
{"epoch": 0.033773861967694566, "step": 24, "batch_size": 64, "mean": 0.24681302905082703, "std": 0.3852858543395996, "min": -1.05438232421875, "p10": -0.15871315002441405, "median": 0.2105693817138672, "p90": 0.676677703857422, "max": 1.017578125, "pos_frac": 0.8125, "sample": [0.819091796875, 0.22774887084960938, -0.025026321411132812, 0.09455108642578125, 0.38970947265625, 0.3717937469482422, 0.5186386108398438, -0.3398399353027344, 0.837371826171875, -0.1030120849609375, 0.5660266876220703, -0.15118026733398438, 0.15813446044921875, 0.6553421020507812, 0.5914955139160156, 0.07912826538085938, 0.3358192443847656, 0.6490898132324219, 0.6303672790527344, 0.193389892578125, 0.5692558288574219, -0.6009292602539062, -0.02081298828125, 0.10593414306640625, 0.8098602294921875, 0.4531745910644531, 0.16662979125976562, -0.5632171630859375, 0.685821533203125, 0.24640655517578125, 1.017578125, 0.4347076416015625, 0.026439666748046875, -0.26555633544921875, 0.411346435546875, -1.05438232421875, 0.0072784423828125, 0.1649017333984375, -0.486785888671875, 0.8258590698242188, 0.520538330078125, 0.12945556640625, -0.10779953002929688, -0.1619415283203125, 0.5302352905273438, 0.7685699462890625, 0.4233856201171875, 0.034759521484375, 0.60699462890625, 0.06989097595214844, 0.46099853515625, 0.05549430847167969, 0.16562652587890625, 0.1550579071044922, 0.0512237548828125, 0.545684814453125, 0.4118328094482422, 0.05966949462890625, 0.4324302673339844, 0.10976409912109375, 0.1558990478515625, 0.0264739990234375, 0.283721923828125, 0.6359176635742188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000024.npy"}
|
|
{"epoch": 0.03524229074889868, "step": 25, "batch_size": 64, "mean": 0.18760085105895996, "std": 0.43097376823425293, "min": -0.7965469360351562, "p10": -0.26451873779296875, "median": 0.16068649291992188, "p90": 0.7383563995361329, "max": 1.2985687255859375, "pos_frac": 0.671875, "sample": [0.12804794311523438, 0.23276138305664062, 0.812713623046875, 0.28350067138671875, -0.15913772583007812, -0.01641082763671875, -0.7965469360351562, 0.7387237548828125, -0.234710693359375, 0.09479713439941406, -0.062652587890625, 1.13702392578125, 0.019969940185546875, 0.7374992370605469, 1.2985687255859375, -0.20354270935058594, -0.48266029357910156, 0.44549560546875, -0.15159988403320312, 0.16452789306640625, -0.756683349609375, 0.1047821044921875, 0.3458576202392578, 0.41656494140625, 0.0524749755859375, 0.5882492065429688, 0.1568450927734375, 0.70672607421875, -0.5544586181640625, -0.11836051940917969, 0.20113372802734375, 0.22026824951171875, -0.41257476806640625, 0.34706878662109375, 0.2742156982421875, -0.1481781005859375, -0.11646842956542969, 1.28704833984375, -0.151397705078125, 0.33989715576171875, -0.3827705383300781, 0.5273056030273438, 0.9083175659179688, 0.12749099731445312, 0.033061981201171875, -0.23712158203125, 0.16898345947265625, -0.082855224609375, 0.7791748046875, 0.4086265563964844, 0.154876708984375, 0.08495330810546875, 0.20885086059570312, 0.1884918212890625, 0.6402816772460938, -0.01061248779296875, 0.0061187744140625, 0.6921272277832031, 0.3874053955078125, -0.004108428955078125, 0.2913932800292969, 0.39542388916015625, -0.2762603759765625, 0.2279205322265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000025.npy"}
|
|
{"epoch": 0.03671071953010279, "step": 26, "batch_size": 64, "mean": 0.31715625524520874, "std": 0.5526702404022217, "min": -1.0053253173828125, "p10": -0.32676544189453116, "median": 0.3045196533203125, "p90": 1.0309844970703128, "max": 2.0223388671875, "pos_frac": 0.71875, "sample": [0.113128662109375, 0.014377593994140625, 0.460693359375, -0.12035369873046875, 0.77655029296875, 0.47946929931640625, 0.0391845703125, 0.46823883056640625, 0.16300201416015625, -0.10274887084960938, 0.555694580078125, 0.04736328125, 2.0223388671875, 0.46804046630859375, 0.0314483642578125, 0.842254638671875, 0.640228271484375, -0.22142791748046875, 0.609588623046875, -0.37191009521484375, 0.43927764892578125, -0.12311935424804688, 0.2072906494140625, 0.071136474609375, 0.04207611083984375, 1.3531112670898438, 0.8749923706054688, -0.553558349609375, 0.33710479736328125, 1.282806396484375, -0.48699951171875, -0.41898345947265625, 0.5139675140380859, 0.19086647033691406, -0.12320899963378906, -0.004852294921875, 1.2952117919921875, -0.1016845703125, 0.1168212890625, 1.7333297729492188, 0.305633544921875, 0.01271820068359375, 1.2339706420898438, 0.323577880859375, 0.37346649169921875, 0.6611099243164062, -1.0053253173828125, 0.7004241943359375, -0.12446403503417969, -0.11997032165527344, -0.1392364501953125, 0.19121551513671875, -0.4105339050292969, 0.464691162109375, 0.4080047607421875, 0.62451171875, 0.547760009765625, 1.0691757202148438, 0.9418716430664062, -0.4925880432128906, 0.30340576171875, -0.12099075317382812, 0.5562324523925781, 0.43259239196777344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000026.npy"}
|
|
{"epoch": 0.0381791483113069, "step": 27, "batch_size": 64, "mean": 0.47805216908454895, "std": 0.5398308634757996, "min": -0.45752525329589844, "p10": -0.10123405456542968, "median": 0.39064788818359375, "p90": 1.0535186767578126, "max": 2.9734344482421875, "pos_frac": 0.859375, "sample": [0.3744697570800781, 0.6298332214355469, 0.6958694458007812, 0.027984619140625, 0.6280479431152344, 1.257659912109375, 0.142181396484375, 0.312225341796875, 1.226318359375, -0.45752525329589844, 0.7053928375244141, 0.3829193115234375, 0.40216827392578125, 0.23453521728515625, 0.91754150390625, 0.49668121337890625, 2.9734344482421875, 0.5941238403320312, 0.42498016357421875, 0.8998298645019531, 0.17241287231445312, 0.0204620361328125, 0.5248031616210938, -0.3114738464355469, 0.37218475341796875, 0.3603248596191406, 0.3461723327636719, 0.1545581817626953, 0.6294212341308594, 1.8933181762695312, 0.07790374755859375, 0.08979988098144531, -0.14921188354492188, 0.975311279296875, 0.26476287841796875, 0.0355224609375, 0.6416473388671875, 0.5415191650390625, -0.08802413940429688, 0.4095802307128906, -0.3013343811035156, 0.4989166259765625, -0.150421142578125, 0.8932571411132812, 0.07563400268554688, 1.264068603515625, -0.16461944580078125, 0.2936515808105469, 1.1271209716796875, 0.7782974243164062, 1.0570526123046875, 0.1056365966796875, 0.6535415649414062, 0.91778564453125, 0.7979583740234375, 0.41764068603515625, 1.0452728271484375, -0.01180267333984375, 0.34255027770996094, -0.10689544677734375, 0.39837646484375, 0.22625732421875, 0.3167266845703125, 0.2910003662109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000027.npy"}
|
|
{"epoch": 0.039647577092511016, "step": 28, "batch_size": 64, "mean": 0.3269844055175781, "std": 0.5473117828369141, "min": -1.1264495849609375, "p10": -0.23368549346923828, "median": 0.3130359649658203, "p90": 0.929747200012207, "max": 1.8195648193359375, "pos_frac": 0.6875, "sample": [0.5857925415039062, -0.001708984375, 0.16159820556640625, 0.206787109375, 0.9269313812255859, 0.2210693359375, 0.24153709411621094, 0.1654052734375, -0.2325439453125, -0.0023403167724609375, -0.17584228515625, 1.02142333984375, 1.8195648193359375, 0.6037750244140625, 0.23374176025390625, -0.1685047149658203, 0.1132659912109375, 1.6129913330078125, -0.047901153564453125, 0.02283477783203125, 0.48603057861328125, -0.37549591064453125, -0.08786773681640625, 0.5328369140625, 0.01104736328125, 0.7650146484375, 0.5304794311523438, 0.548492431640625, 1.4155616760253906, 0.4675025939941406, 0.3031654357910156, 0.72845458984375, 0.09131240844726562, -0.15814208984375, 0.3289756774902344, 0.0015659332275390625, 0.7048721313476562, 0.64306640625, 0.686676025390625, -0.23967361450195312, 0.41849517822265625, 0.5850391387939453, -0.08180046081542969, 0.9309539794921875, 1.2599639892578125, 0.4232635498046875, 0.5107955932617188, -0.2341747283935547, 0.9159584045410156, 0.39160919189453125, 1.1056060791015625, -1.0530242919921875, -0.23793792724609375, 0.322906494140625, -0.17375946044921875, 0.4518775939941406, -0.08287811279296875, 0.8892822265625, 0.920684814453125, -0.14557266235351562, 0.7374496459960938, -1.1264495849609375, -0.4029998779296875, -0.09003829956054688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000028.npy"}
|
|
{"epoch": 0.041116005873715125, "step": 29, "batch_size": 64, "mean": 0.5435110330581665, "std": 0.47907721996307373, "min": -0.2703857421875, "p10": -0.05721931457519531, "median": 0.4859962463378906, "p90": 1.1236518859863283, "max": 1.654754638671875, "pos_frac": 0.859375, "sample": [1.0502395629882812, 0.03613853454589844, 1.4118499755859375, 0.40850830078125, 0.40686798095703125, 0.4542236328125, 0.196441650390625, 0.44173431396484375, -0.059017181396484375, 0.1060028076171875, -0.0530242919921875, 0.05158042907714844, 1.1742630004882812, 0.6664810180664062, -0.2703857421875, 0.8603057861328125, 0.8216552734375, 0.09227752685546875, 0.13301849365234375, 1.0702896118164062, -0.236419677734375, 0.9146804809570312, 0.9024620056152344, 0.2775096893310547, 1.3866195678710938, 0.8463363647460938, 0.1355133056640625, 0.69732666015625, 0.707916259765625, 0.620819091796875, 0.05397796630859375, 1.0380783081054688, 0.9883041381835938, 0.1738739013671875, 0.40185546875, 0.2219867706298828, -0.16268157958984375, 0.13527679443359375, 1.0994644165039062, 0.90570068359375, 0.871856689453125, 0.9801559448242188, 1.1340179443359375, 0.2855072021484375, 0.7523593902587891, -0.039459228515625, 1.0289154052734375, -0.0693206787109375, 0.8375968933105469, -0.13879966735839844, 0.023403167724609375, 0.028638839721679688, 1.4545440673828125, 0.4683074951171875, 0.6846961975097656, 1.654754638671875, 0.3289031982421875, 0.3986663818359375, 1.0158653259277344, 0.5656089782714844, 1.1815338134765625, 0.5036849975585938, -0.14714813232421875, 0.8723640441894531], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000029.npy"}
|
|
{"epoch": 0.042584434654919234, "step": 30, "batch_size": 64, "mean": 0.7230584621429443, "std": 0.7328934073448181, "min": -0.6118450164794922, "p10": -0.0256057739257812, "median": 0.5879249572753906, "p90": 1.5614387512207033, "max": 3.876251220703125, "pos_frac": 0.890625, "sample": [0.3845176696777344, 0.2635536193847656, 0.07767677307128906, 0.601348876953125, 1.2273712158203125, 0.04061126708984375, 0.061100006103515625, -0.3961181640625, -0.0469970703125, 0.8810272216796875, 0.5745010375976562, 0.969329833984375, 1.5156326293945312, 1.2464866638183594, 1.14593505859375, 1.506805419921875, 0.8122482299804688, 1.8477020263671875, 0.4006805419921875, 1.792510986328125, 0.3214569091796875, 1.036468505859375, 3.876251220703125, 0.5130386352539062, 0.8560333251953125, 0.3034934997558594, 0.8386993408203125, 0.298614501953125, -0.3193073272705078, 0.26077842712402344, 0.4870452880859375, 0.21959304809570312, 0.75274658203125, 0.21504783630371094, 0.23157882690429688, 0.498077392578125, 2.1826934814453125, 1.76251220703125, 1.492767333984375, 0.9364032745361328, 0.0243072509765625, 1.314422607421875, 0.8279399871826172, 0.6727371215820312, -0.4527435302734375, 0.915283203125, 1.5810699462890625, 0.15038108825683594, 0.9537887573242188, 1.1121673583984375, 1.483978271484375, 1.1379776000976562, 0.3411540985107422, -0.6118450164794922, 0.08387184143066406, 0.8296279907226562, 0.4936981201171875, -0.053279876708984375, 0.91973876953125, -0.07549858093261719, 0.2212066650390625, 1.842529296875, 0.5493698120117188, 0.34394264221191406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000030.npy"}
|
|
{"epoch": 0.04405286343612335, "step": 31, "batch_size": 64, "mean": 0.5276015996932983, "std": 0.7071222066879272, "min": -1.0256805419921875, "p10": -0.1848886489868164, "median": 0.4151649475097656, "p90": 1.5833168029785158, "max": 2.940093994140625, "pos_frac": 0.78125, "sample": [0.3182411193847656, 1.427703857421875, -0.18590545654296875, 0.5142173767089844, 2.040283203125, 0.6020965576171875, 0.00701904296875, 0.09102630615234375, 0.08607864379882812, -0.3568878173828125, 2.940093994140625, 0.034912109375, -0.0241851806640625, 0.30795860290527344, -0.09017181396484375, 0.1771411895751953, 0.90386962890625, 0.4360542297363281, 1.3033294677734375, 0.7772712707519531, -0.17089080810546875, 0.06509780883789062, 0.13835906982421875, 0.35826873779296875, -0.19287872314453125, 0.3592414855957031, 0.591156005859375, 0.4707603454589844, 0.5934200286865234, 0.1453857421875, 1.625762939453125, 0.7523574829101562, 2.1017303466796875, 1.2422027587890625, -0.0054302215576171875, -1.0256805419921875, 0.139007568359375, 0.6290664672851562, -0.17429351806640625, 1.211517333984375, 0.93072509765625, 0.6501388549804688, -0.2117767333984375, 0.2517280578613281, -0.0552978515625, -0.57427978515625, 0.42090606689453125, 0.5308456420898438, 0.1659088134765625, -0.18251609802246094, 0.2623176574707031, 1.6089401245117188, 1.7441558837890625, 1.0493927001953125, -0.22910308837890625, 0.5398483276367188, 0.7239532470703125, 0.8177642822265625, 0.9296073913574219, 0.09682846069335938, 1.523529052734375, 1.7419548034667969, 0.409423828125, 0.4571990966796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000031.npy"}
|
|
{"epoch": 0.04552129221732746, "step": 32, "batch_size": 64, "mean": 0.7771281003952026, "std": 0.9538834095001221, "min": -0.47658538818359375, "p10": -0.05105133056640621, "median": 0.5420560836791992, "p90": 2.123696899414063, "max": 4.82568359375, "pos_frac": 0.859375, "sample": [0.9959259033203125, 0.3302040100097656, 1.0192413330078125, 1.0769882202148438, 0.8199424743652344, 0.7337799072265625, -0.27393341064453125, 0.980682373046875, 0.5524959564208984, 0.6914596557617188, 1.4624710083007812, 0.7009925842285156, 0.15567779541015625, 0.09993362426757812, 0.02197265625, 1.3996505737304688, -0.000972747802734375, 0.95635986328125, 0.08842849731445312, 2.7681427001953125, 0.23863983154296875, 1.45135498046875, 2.1896514892578125, 0.088226318359375, 0.9182052612304688, 0.5241775512695312, 0.061248779296875, 0.24793243408203125, -0.17617416381835938, 1.0587539672851562, 0.21556854248046875, 0.05178642272949219, 0.2982921600341797, 3.305450439453125, 1.4681320190429688, 0.6487407684326172, 1.11468505859375, 0.3124237060546875, -0.06660842895507812, 0.04695892333984375, 2.3912124633789062, 1.011220932006836, -0.20952606201171875, -0.014751434326171875, 4.82568359375, 0.5316162109375, 0.03543853759765625, 2.1880035400390625, 2.5827178955078125, -0.47658538818359375, -0.26285362243652344, 0.132110595703125, 1.9736480712890625, 0.4666900634765625, -0.4429664611816406, 0.048488616943359375, 1.2413253784179688, 1.360382080078125, 0.32300567626953125, 0.804931640625, 0.24015235900878906, 1.0026321411132812, 1.162750244140625, 0.24398040771484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000032.npy"}
|
|
{"epoch": 0.04698972099853157, "step": 33, "batch_size": 64, "mean": 0.6502406597137451, "std": 0.6424391865730286, "min": -0.5984420776367188, "p10": -0.060972595214843564, "median": 0.6674442291259766, "p90": 1.379373931884766, "max": 2.779571533203125, "pos_frac": 0.890625, "sample": [-0.24915313720703125, 0.4920635223388672, 0.8399677276611328, 0.753387451171875, 2.037750244140625, -0.13872146606445312, 0.44989013671875, 0.235809326171875, 0.1460590362548828, 0.81451416015625, 1.1784286499023438, 1.2390518188476562, 1.2015380859375, 0.7401580810546875, 1.6524505615234375, 0.5310211181640625, 0.844512939453125, -0.5593643188476562, 0.6754913330078125, -0.4715118408203125, 0.1312103271484375, 1.4083480834960938, 0.9441413879394531, 1.1463851928710938, 0.8568038940429688, 0.198486328125, 1.5921173095703125, 0.14862823486328125, 0.1996307373046875, 2.779571533203125, 0.17977142333984375, 0.1916961669921875, 1.921051025390625, 0.1531829833984375, 0.3746986389160156, 0.6593971252441406, 0.20462799072265625, 0.7693290710449219, 0.8638839721679688, 0.7999191284179688, 0.4795722961425781, 1.2891311645507812, 0.14617919921875, 0.12044143676757812, 0.6944046020507812, 1.0026779174804688, 0.2164764404296875, 1.80426025390625, 0.5185222625732422, -0.5984420776367188, 0.72991943359375, -0.3558540344238281, 0.6945343017578125, 0.38907623291015625, 1.311767578125, 0.800994873046875, 1.0286788940429688, -0.576690673828125, 0.6872024536132812, 0.6527023315429688, 0.40756988525390625, 0.441436767578125, 0.5739517211914062, 1.2206649780273438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000033.npy"}
|
|
{"epoch": 0.048458149779735685, "step": 34, "batch_size": 64, "mean": 0.746705949306488, "std": 0.8102946877479553, "min": -0.6964111328125, "p10": -0.1118879318237304, "median": 0.5267505645751953, "p90": 1.9621887207031254, "max": 2.7895660400390625, "pos_frac": 0.84375, "sample": [2.2600555419921875, -0.382476806640625, 0.0338134765625, 0.17436981201171875, 2.7895660400390625, 0.22679519653320312, 1.836151123046875, 0.1152191162109375, 0.2958984375, 0.21121978759765625, 0.5993423461914062, -0.04025077819824219, 0.7791061401367188, 0.04611968994140625, 1.2389678955078125, 2.6451568603515625, 0.40437889099121094, -0.0302581787109375, 1.8288421630859375, 0.352386474609375, 0.9916000366210938, -0.34340667724609375, 1.1016788482666016, 1.3715934753417969, 0.767303466796875, 1.2220535278320312, 1.5850830078125, -0.3909149169921875, 0.4646759033203125, 1.05535888671875, 0.3616790771484375, 0.2365264892578125, 1.4599800109863281, 1.2305450439453125, 2.499114990234375, 1.1837387084960938, 0.7829151153564453, 2.016204833984375, 0.11853408813476562, -0.14258956909179688, 0.5471763610839844, 0.4322090148925781, 0.8033866882324219, 1.3935165405273438, 0.5063247680664062, 0.7366256713867188, 0.04695701599121094, 0.2702674865722656, 1.2300567626953125, -0.17953109741210938, 0.4559974670410156, -0.6964111328125, 0.9442825317382812, 0.5979461669921875, 0.260711669921875, -0.14369964599609375, 1.6713638305664062, 0.21090316772460938, 0.50103759765625, 0.6701431274414062, 2.1377105712890625, 2.41534423828125, 0.05348014831542969, -0.032695770263671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000034.npy"}
|
|
{"epoch": 0.049926578560939794, "step": 35, "batch_size": 64, "mean": 1.1969325542449951, "std": 1.0398361682891846, "min": -0.39159393310546875, "p10": 0.24703006744384767, "median": 0.8615331649780273, "p90": 2.5184783935546875, "max": 5.392669677734375, "pos_frac": 0.9375, "sample": [0.9136810302734375, 0.5919227600097656, 2.534637451171875, 1.7511711120605469, 0.27216339111328125, 0.7329120635986328, 0.44903564453125, -0.39159393310546875, -0.35486602783203125, 1.2593841552734375, 2.0475616455078125, 0.2452259063720703, 0.8866043090820312, 0.8664302825927734, 0.8566360473632812, 0.45723724365234375, 1.8353538513183594, 3.3847885131835938, 0.585723876953125, 0.5452785491943359, 0.7119560241699219, 2.1163330078125, 3.7198486328125, 0.5039901733398438, 0.6290512084960938, 1.3246383666992188, 0.5266895294189453, 1.4467315673828125, 0.42350006103515625, 0.8121795654296875, 5.392669677734375, 1.4181709289550781, 1.2644805908203125, 1.5477828979492188, 0.4375591278076172, 3.1197357177734375, 0.2512397766113281, 1.59246826171875, 2.1557769775390625, 1.7052459716796875, 0.6746559143066406, 2.48077392578125, 0.7834396362304688, 1.868408203125, 0.5668754577636719, 0.0865936279296875, 1.522705078125, 2.7360992431640625, 0.8537063598632812, 0.4889984130859375, 1.0793037414550781, 0.1898193359375, 0.7211170196533203, 1.8010711669921875, 0.9594573974609375, 0.8452854156494141, 1.6186065673828125, -0.20038223266601562, 1.327850341796875, -0.3899078369140625, 0.7706146240234375, 3.0047836303710938, 1.6714935302734375, 0.5729751586914062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000035.npy"}
|
|
{"epoch": 0.0513950073421439, "step": 36, "batch_size": 64, "mean": 1.0765833854675293, "std": 1.2660794258117676, "min": -2.039825439453125, "p10": -0.1757331848144531, "median": 0.8190517425537109, "p90": 2.945435333251954, "max": 4.1096649169921875, "pos_frac": 0.796875, "sample": [1.2470016479492188, -0.2464580535888672, 0.8471717834472656, 0.534332275390625, 1.1215667724609375, 0.6744594573974609, 0.4143524169921875, 0.07220458984375, -0.00981903076171875, 2.6346588134765625, 1.9031906127929688, -0.12758445739746094, -0.18674468994140625, 0.42926597595214844, 0.12749481201171875, 1.1361618041992188, -0.32384681701660156, 1.402313232421875, 1.6745147705078125, 0.245758056640625, -0.0168609619140625, -0.3453083038330078, -0.1500396728515625, 0.3146839141845703, 3.7512664794921875, 3.0732765197753906, 1.3279953002929688, -0.6160964965820312, 3.0111083984375, 0.0812530517578125, 0.7633266448974609, 1.266448974609375, -0.2580604553222656, 3.4254150390625, -0.06280136108398438, 0.08790969848632812, 3.73492431640625, 0.131500244140625, 2.7111053466796875, 0.22112274169921875, 2.7921981811523438, 1.0425224304199219, 2.683258056640625, 1.1128349304199219, 2.566558837890625, 0.8632850646972656, -0.016178131103515625, 4.1096649169921875, 0.7909317016601562, -2.039825439453125, 1.924530029296875, 1.0083847045898438, 2.3319625854492188, 0.32218170166015625, 2.1575164794921875, 1.612142562866211, 0.306549072265625, 0.6573562622070312, 2.62005615234375, 0.31793212890625, 0.9768753051757812, 3.379486083984375, 1.0265579223632812, 0.33238983154296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000036.npy"}
|
|
{"epoch": 0.05286343612334802, "step": 37, "batch_size": 64, "mean": 0.9973729848861694, "std": 1.3546226024627686, "min": -1.63958740234375, "p10": -0.1776153564453124, "median": 0.567108154296875, "p90": 2.824113464355469, "max": 6.561920166015625, "pos_frac": 0.828125, "sample": [1.771148681640625, 0.09513282775878906, 1.3722076416015625, 1.5762176513671875, -0.21652984619140625, 0.14624786376953125, -0.37265777587890625, 0.8405418395996094, 1.2815780639648438, 0.08441162109375, 0.151397705078125, 2.82421875, 0.5826950073242188, -0.39111328125, -0.5906829833984375, 0.32269287109375, 0.8065814971923828, 0.23133277893066406, 1.9109249114990234, 6.561920166015625, 0.7534866333007812, 4.8064727783203125, 1.2612838745117188, 1.0320663452148438, 1.2748031616210938, 1.4491729736328125, 0.5515213012695312, 0.2530555725097656, 1.2029342651367188, 0.5291728973388672, 0.3574485778808594, -0.0721435546875, 0.42620849609375, -1.0369644165039062, 0.3907470703125, 0.289337158203125, 0.4066276550292969, 1.9856414794921875, 2.1691818237304688, 3.168243408203125, -0.016994476318359375, 2.8781509399414062, 0.4447021484375, 1.2469329833984375, 0.14479827880859375, 0.5400276184082031, -0.08681488037109375, -0.639190673828125, 0.5925827026367188, 1.5365447998046875, 1.8402786254882812, 0.01959991455078125, 2.1740798950195312, 3.4232254028320312, 0.33983612060546875, 2.8238677978515625, 1.59307861328125, -1.63958740234375, -0.06830024719238281, 0.9882888793945312, 1.97747802734375, 3.2366180419921875, 0.08591842651367188, 0.21018409729003906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000037.npy"}
|
|
{"epoch": 0.05433186490455213, "step": 38, "batch_size": 64, "mean": 1.3659417629241943, "std": 1.6389647722244263, "min": -1.14990234375, "p10": 0.014782333374023543, "median": 0.7885608673095703, "p90": 3.2000808715820335, "max": 8.080780029296875, "pos_frac": 0.890625, "sample": [0.6635169982910156, 8.080780029296875, 4.6068115234375, 1.9689979553222656, 0.22835540771484375, 0.12179946899414062, 0.9399642944335938, 0.3187713623046875, 1.2687454223632812, 0.77935791015625, 1.5674476623535156, 0.6243095397949219, 0.3441925048828125, 2.6112823486328125, 1.2030258178710938, -0.24592018127441406, 0.36954498291015625, 0.37017822265625, -0.3471660614013672, 1.756927490234375, 0.8768539428710938, 1.3774871826171875, 2.5783767700195312, 0.15166854858398438, 3.4215545654296875, 0.2665576934814453, 2.2142257690429688, 2.4918441772460938, 1.6430549621582031, 0.12809371948242188, 0.6522617340087891, 2.6998291015625, -1.0324554443359375, 0.7930679321289062, 0.6639461517333984, 0.3870086669921875, 0.5740966796875, 5.8039398193359375, 0.7773017883300781, 0.7197189331054688, 3.4144744873046875, 5.61309814453125, 1.9157867431640625, 0.7840538024902344, 0.6161899566650391, 0.157928466796875, -0.0310821533203125, 0.334716796875, 0.7182693481445312, 1.4850997924804688, -1.14990234375, 0.6238975524902344, -0.0604400634765625, 1.4867286682128906, 1.3578205108642578, 1.828704833984375, 1.5310897827148438, 5.1415863037109375, 1.8444976806640625, 0.35773468017578125, 1.6680145263671875, -0.346893310546875, 1.3699493408203125, 2.339588165283203], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000038.npy"}
|
|
{"epoch": 0.055800293685756244, "step": 39, "batch_size": 64, "mean": 1.5928758382797241, "std": 1.4390220642089844, "min": -3.077972412109375, "p10": 0.159661865234375, "median": 1.3081684112548828, "p90": 3.6072647094726564, "max": 5.1002655029296875, "pos_frac": 0.9375, "sample": [2.3711471557617188, 2.3833465576171875, 5.1002655029296875, 2.007232666015625, 1.4590301513671875, -0.7244720458984375, 1.9925918579101562, 2.503589630126953, 1.058206558227539, 2.176607131958008, 0.4338226318359375, 3.1937484741210938, 1.7240447998046875, 0.99615478515625, 3.61083984375, 0.943359375, 0.5213966369628906, 0.023822784423828125, 0.152099609375, 0.7901382446289062, 0.8360366821289062, 1.5935821533203125, 2.3125457763671875, 1.1271076202392578, 0.7700176239013672, 2.09686279296875, -0.221435546875, 1.5769405364990234, 4.54693603515625, 0.17730712890625, 0.84735107421875, 0.4602947235107422, 3.2364578247070312, 2.0514373779296875, -3.077972412109375, 1.9178581237792969, 0.48328590393066406, 2.2504348754882812, 3.8771018981933594, 4.4530487060546875, 0.9497394561767578, 1.8200225830078125, 3.9138870239257812, 0.84368896484375, 1.2179908752441406, 0.49642181396484375, -0.38555908203125, 0.7885818481445312, 1.398345947265625, 1.0406856536865234, 0.84185791015625, 3.505084991455078, 3.4199066162109375, 2.62664794921875, 0.6049289703369141, 4.143898010253906, 3.5989227294921875, 0.9130954742431641, 1.8572578430175781, 0.5944919586181641, 0.5456352233886719, 0.07883453369140625, 0.8395538330078125, 2.2579593658447266], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000039.npy"}
|
|
{"epoch": 0.05726872246696035, "step": 40, "batch_size": 64, "mean": 1.4550960063934326, "std": 1.6168969869613647, "min": -0.796173095703125, "p10": -0.15202865600585933, "median": 1.1399431228637695, "p90": 3.8512977600097664, "max": 7.0899810791015625, "pos_frac": 0.796875, "sample": [-0.0041961669921875, -0.171600341796875, 0.9000473022460938, 1.1478424072265625, 1.7231674194335938, 0.40460205078125, 0.453948974609375, 0.3822784423828125, 0.24963951110839844, 2.3627243041992188, 5.5780792236328125, 3.9507293701171875, 0.4897727966308594, 3.5570335388183594, 1.6430511474609375, -0.3286476135253906, 1.7451705932617188, 2.260345458984375, 0.29067039489746094, -0.796173095703125, 2.7915077209472656, -0.1794910430908203, 0.867767333984375, -0.0429229736328125, 2.341644287109375, 1.4640045166015625, 0.2652740478515625, 1.1320438385009766, 0.2735137939453125, 0.020477294921875, 3.943878173828125, -0.063934326171875, 3.2657241821289062, -0.4401092529296875, 0.3569774627685547, 4.519287109375, -0.10636138916015625, 3.6352767944335938, 0.6914749145507812, 1.159799575805664, 4.614284515380859, 0.7538967132568359, 0.7725715637207031, 7.0899810791015625, -0.25807952880859375, 1.7203750610351562, -0.29681396484375, 1.664703369140625, 2.6889190673828125, 2.0080032348632812, 1.157257080078125, 2.345592498779297, 3.1494216918945312, 1.5474910736083984, 0.8373031616210938, 1.4275131225585938, 4.5256500244140625, -0.1024322509765625, 1.5690231323242188, 0.3752899169921875, -0.00429534912109375, 1.6073341369628906, 1.7033615112304688, 0.4954719543457031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000040.npy"}
|
|
{"epoch": 0.05873715124816446, "step": 41, "batch_size": 64, "mean": 1.8601820468902588, "std": 1.9838576316833496, "min": -3.1455230712890625, "p10": 0.17854251861572268, "median": 1.3451805114746094, "p90": 4.399060058593751, "max": 8.156150817871094, "pos_frac": 0.921875, "sample": [0.7600860595703125, 0.6204605102539062, 3.444580078125, 1.1609878540039062, 1.1115875244140625, 2.1210098266601562, 3.1347198486328125, 2.0017242431640625, -0.7457733154296875, 3.6581192016601562, 5.7264404296875, 0.8606796264648438, 2.614288330078125, 8.156150817871094, 0.20485687255859375, 0.4119415283203125, 1.08642578125, 1.4584579467773438, 0.40570831298828125, -0.792755126953125, 1.4449882507324219, 3.3947792053222656, 4.5734710693359375, 1.0725078582763672, 6.8910064697265625, 1.4473381042480469, 4.251617431640625, 4.462249755859375, 0.7181377410888672, 0.2870979309082031, 2.2901153564453125, 0.14923095703125, 6.57989501953125, 0.5816879272460938, 0.9088363647460938, 3.3261795043945312, 1.5010757446289062, 2.63946533203125, 0.64715576171875, 2.6401748657226562, 3.9508056640625, 1.5170211791992188, -3.1455230712890625, 3.851348876953125, 0.6175155639648438, 0.5323486328125, 2.1852035522460938, 1.7238578796386719, 2.365224838256836, 0.5721855163574219, 1.346649169921875, 1.3437118530273438, 0.9392547607421875, 0.844879150390625, 0.5970306396484375, 1.2918968200683594, 0.32247161865234375, 3.25054931640625, 0.40691375732421875, 0.1672649383544922, -0.3264427185058594, -0.26717185974121094, 1.8956222534179688, 5.8623199462890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000041.npy"}
|
|
{"epoch": 0.06020558002936858, "step": 42, "batch_size": 64, "mean": 2.4012622833251953, "std": 2.264364719390869, "min": -1.7234725952148438, "p10": 0.14524459838867199, "median": 1.9027366638183594, "p90": 5.38767547607422, "max": 10.277374267578125, "pos_frac": 0.90625, "sample": [2.1001663208007812, -0.31462860107421875, 1.7999210357666016, 9.008941650390625, 10.277374267578125, 2.5292587280273438, 0.2439117431640625, 6.35498046875, 2.099254608154297, 3.0672149658203125, 1.5278472900390625, 5.944740295410156, 2.6706008911132812, 7.149749755859375, 0.8226165771484375, 2.5368804931640625, 3.007537841796875, 0.10295867919921875, 2.6710433959960938, 3.1967697143554688, 0.9283828735351562, 1.1222801208496094, 3.48785400390625, 0.7785110473632812, 0.95635986328125, 2.3324356079101562, -0.3743438720703125, 2.38702392578125, 1.8348274230957031, -0.2699432373046875, 1.9706459045410156, 5.4720458984375, 0.35210418701171875, 5.555328369140625, 2.67803955078125, 1.4669036865234375, 0.7678871154785156, 1.5419559478759766, 1.604705810546875, -0.35558319091796875, 4.1780242919921875, -0.102386474609375, 1.592620849609375, 5.1908111572265625, 1.3199348449707031, 4.362541198730469, 0.7113990783691406, 0.2873802185058594, 1.3081245422363281, 1.3293914794921875, 0.8432807922363281, 5.063720703125, -1.7234725952148438, 0.5830059051513672, 2.1158981323242188, 3.26177978515625, 0.4300041198730469, 2.128082275390625, 4.3185272216796875, 1.1564483642578125, 3.9297637939453125, 5.125633239746094, 0.9566249847412109, 4.279083251953125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000042.npy"}
|
|
{"epoch": 0.06167400881057269, "step": 43, "batch_size": 64, "mean": 2.2915761470794678, "std": 1.8442620038986206, "min": -1.3382720947265625, "p10": 0.20471076965332036, "median": 2.2409238815307617, "p90": 4.2679183959960945, "max": 9.910430908203125, "pos_frac": 0.953125, "sample": [0.17993545532226562, 0.2793388366699219, 0.04212379455566406, 3.0245819091796875, 3.8369293212890625, 1.1729583740234375, 3.1352767944335938, 2.0756683349609375, 2.4247207641601562, 6.244384765625, 0.84954833984375, 2.090494155883789, 0.6218185424804688, 1.8914566040039062, 2.8049583435058594, 0.6611747741699219, 3.2745513916015625, 4.906486511230469, 1.9125442504882812, 0.1100616455078125, 5.8448486328125, 2.9197235107421875, 4.715629577636719, 0.9420890808105469, 2.5420150756835938, 2.2569656372070312, 0.4594459533691406, 2.238210678100586, -0.0892181396484375, 4.3333740234375, 1.0724258422851562, 2.414926528930664, 4.1151885986328125, 2.717041015625, 3.1447601318359375, 3.3616905212402344, 0.7147903442382812, 2.2436370849609375, 1.5338554382324219, 0.26251983642578125, 1.2592926025390625, 1.8533973693847656, 5.473670959472656, 3.3400344848632812, 0.9420394897460938, -1.3382720947265625, 0.8331813812255859, 2.9290313720703125, 2.4869842529296875, 3.13623046875, 3.0084190368652344, 3.4650039672851562, 2.0988006591796875, 0.1757984161376953, 2.0932159423828125, 9.910430908203125, 3.722412109375, 1.3335742950439453, -0.13690757751464844, 0.5988121032714844, 3.1652069091796875, 4.072071075439453, 2.6092910766601562, 0.3462200164794922], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000043.npy"}
|
|
{"epoch": 0.0631424375917768, "step": 44, "batch_size": 64, "mean": 2.59177565574646, "std": 2.016450881958008, "min": -0.8762359619140625, "p10": 0.38601455688476566, "median": 2.179595947265625, "p90": 5.080787658691406, "max": 9.62158203125, "pos_frac": 0.953125, "sample": [3.7275161743164062, 2.5944671630859375, 1.6561145782470703, 0.5362453460693359, 6.453006744384766, 2.4590072631835938, 2.3112716674804688, 1.53155517578125, 1.7818260192871094, 4.9842071533203125, 5.938667297363281, 0.6170539855957031, 4.184028625488281, 5.0717620849609375, 4.694427490234375, 3.115447998046875, 0.8129081726074219, 1.96044921875, 1.9695167541503906, 5.0089569091796875, 0.2933483123779297, 2.6399993896484375, 2.5745697021484375, 6.73895263671875, 2.1693572998046875, 2.9775390625, 0.42157745361328125, 1.4966354370117188, 5.08465576171875, 1.7102489471435547, 2.4789352416992188, -0.2145843505859375, 1.6813850402832031, 1.3010025024414062, 2.0637550354003906, 2.9074859619140625, 1.1730575561523438, 1.3952102661132812, 4.4058380126953125, 1.4979248046875, 1.263580322265625, 2.5147552490234375, 3.7124176025390625, 3.3727874755859375, 1.2836036682128906, 3.2575531005859375, 3.16265869140625, 2.155609130859375, 0.3707733154296875, 0.2785682678222656, 5.4033660888671875, 3.7871017456054688, 2.1898345947265625, 3.5688400268554688, 1.32330322265625, 9.62158203125, 0.6666374206542969, -0.5729217529296875, 3.3179397583007812, 1.1030006408691406, -0.8762359619140625, 0.2302532196044922, 1.0339508056640625, 7.499351501464844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000044.npy"}
|
|
{"epoch": 0.06461086637298091, "step": 45, "batch_size": 64, "mean": 2.0580525398254395, "std": 2.3884527683258057, "min": -1.5616302490234375, "p10": -0.14740734100341796, "median": 1.5436782836914062, "p90": 5.3272441864013675, "max": 11.2528076171875, "pos_frac": 0.84375, "sample": [3.1046524047851562, 1.011962890625, -0.0214996337890625, 2.1346473693847656, 0.18279647827148438, 1.4576549530029297, 2.715160369873047, 6.28839111328125, 3.9791259765625, 2.9083328247070312, 0.6134185791015625, 7.134590148925781, 3.1790390014648438, 0.36995697021484375, 0.00685882568359375, 1.7134475708007812, -0.1494598388671875, 1.683074951171875, 0.08579254150390625, 5.156497955322266, 0.06158447265625, 0.4638671875, 2.7192230224609375, 1.0085487365722656, 0.6035842895507812, 4.764617919921875, 6.135795593261719, 4.392608642578125, 0.20955657958984375, 0.5912628173828125, 4.492034912109375, 2.1043319702148438, 4.9505615234375, -0.293182373046875, -0.672698974609375, 1.5665130615234375, 6.4234771728515625, 3.3507003784179688, -0.042755126953125, 1.5697402954101562, 1.29693603515625, 0.2271728515625, -1.1969223022460938, -0.9599761962890625, 1.520843505859375, 11.2528076171875, 0.9487991333007812, 6.2930755615234375, 3.1636734008789062, -0.6758041381835938, 2.392333984375, 0.24912261962890625, 1.8039989471435547, 0.4456787109375, 1.3539161682128906, 5.400421142578125, 0.9219207763671875, 2.1238861083984375, 2.168153762817383, 2.006561279296875, -0.14261817932128906, 4.042087554931641, 0.6871185302734375, -1.5616302490234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000045.npy"}
|
|
{"epoch": 0.06607929515418502, "step": 46, "batch_size": 64, "mean": 2.683544158935547, "std": 2.872194766998291, "min": -2.4341888427734375, "p10": -0.03670501708984372, "median": 1.8838224411010742, "p90": 6.296036911010743, "max": 13.736114501953125, "pos_frac": 0.875, "sample": [0.6534500122070312, 0.16007232666015625, 5.4079742431640625, 6.82208251953125, 3.903533935546875, -2.4341888427734375, 1.4024887084960938, 1.6474590301513672, 6.412384033203125, 3.469125747680664, 6.378353118896484, 1.3703804016113281, 0.6748580932617188, 2.3860416412353516, 4.32635498046875, 0.3746070861816406, -0.6426239013671875, 0.7053070068359375, 3.7569198608398438, 10.354736328125, -0.6335697174072266, 0.168304443359375, 0.14716720581054688, 3.5892868041992188, 0.40076446533203125, 4.192359924316406, 3.7990570068359375, 1.0782546997070312, 4.0302734375, 2.3488292694091797, 3.5874252319335938, 0.2877655029296875, -1.014251708984375, 0.6239356994628906, 2.5388946533203125, 5.587726593017578, 5.059410095214844, 0.233673095703125, 7.01715087890625, -0.0489959716796875, 0.3671092987060547, 2.4862213134765625, 5.0970001220703125, -0.008026123046875, 13.736114501953125, 0.2815399169921875, 5.7935333251953125, 5.4744415283203125, 0.7355422973632812, 2.4919891357421875, 1.7243309020996094, 7.4520111083984375, 2.972412109375, -0.1126556396484375, 1.6361656188964844, 6.103965759277344, 1.1144790649414062, 2.043313980102539, 2.8827896118164062, 5.5449676513671875, 1.3697128295898438, 0.9201297760009766, 1.6157073974609375, -0.0987396240234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000046.npy"}
|
|
{"epoch": 0.06754772393538913, "step": 47, "batch_size": 64, "mean": 2.6440374851226807, "std": 2.436188220977783, "min": -2.0778274536132812, "p10": -0.3144378662109374, "median": 2.044175148010254, "p90": 5.9337810516357425, "max": 9.037277221679688, "pos_frac": 0.875, "sample": [2.0107269287109375, 2.8916969299316406, 0.7004318237304688, 1.4367828369140625, 4.626644134521484, 1.8734378814697266, 1.2434234619140625, 1.68170166015625, 6.170848846435547, 2.594707489013672, -0.4102325439453125, 5.838764190673828, 5.77117919921875, 1.1662483215332031, 3.4341812133789062, 2.172576904296875, -0.4951019287109375, 1.4191436767578125, 1.61907958984375, 4.828239440917969, 6.338218688964844, 1.3417816162109375, 0.6892929077148438, 2.817819595336914, 3.1964492797851562, 1.003072738647461, -0.8628311157226562, 5.9745025634765625, 7.43927001953125, 2.363433837890625, -0.36870574951171875, 2.9457454681396484, 5.625637054443359, 3.8297271728515625, -2.0778274536132812, 0.2804737091064453, 0.240264892578125, 1.2977638244628906, 0.7446441650390625, 8.74786376953125, 4.98248291015625, 1.9846458435058594, 0.9895172119140625, 2.55023193359375, 8.3883056640625, 4.994384765625, -0.18781280517578125, 3.459646224975586, 9.037277221679688, -1.2849197387695312, 1.3301162719726562, 1.89019775390625, -0.428253173828125, 1.3579158782958984, 2.4763641357421875, 1.233154296875, 5.222541809082031, 1.5460433959960938, 4.363006591796875, 1.9827499389648438, 3.451171875, 3.3071136474609375, 2.0776233673095703, 2.353811264038086], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000047.npy"}
|
|
{"epoch": 0.06901615271659324, "step": 48, "batch_size": 64, "mean": 2.618363618850708, "std": 2.697575092315674, "min": -6.274169921875, "p10": 0.04299640655517581, "median": 1.9798402786254883, "p90": 6.479444885253907, "max": 10.422393798828125, "pos_frac": 0.921875, "sample": [3.62164306640625, 0.06938552856445312, 6.5425567626953125, 7.346382141113281, 5.86767578125, 2.7173843383789062, -0.3670196533203125, 2.1086044311523438, 5.794342041015625, 3.8012466430664062, 1.244873046875, 4.390777587890625, 1.4231109619140625, 0.6008148193359375, -0.03228759765625, 1.221099853515625, 6.332183837890625, 0.03168678283691406, 1.4495925903320312, 0.029964447021484375, 0.2915077209472656, 8.024215698242188, -0.30505943298339844, 1.873779296875, 5.407218933105469, 1.5107192993164062, 2.4865875244140625, 3.365468978881836, 0.37664031982421875, 3.061185836791992, 1.8073158264160156, 1.5336074829101562, 3.660369873046875, 1.669790267944336, 7.530364990234375, 3.478504180908203, 4.298866271972656, 8.174140930175781, 2.93084716796875, 1.2791290283203125, 0.336181640625, 1.6781425476074219, 0.6991348266601562, 2.6858978271484375, 0.1956787109375, 3.3679447174072266, 2.845155715942383, 1.6022796630859375, -1.5492897033691406, 7.2345123291015625, 2.0859012603759766, 2.96038818359375, 1.7535552978515625, 10.422393798828125, 3.6663894653320312, -6.274169921875, 4.630802154541016, 1.4327621459960938, 0.776519775390625, 0.5687713623046875, 4.704559326171875, 1.6186065673828125, 2.086688995361328, 1.397247314453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000048.npy"}
|
|
{"epoch": 0.07048458149779736, "step": 49, "batch_size": 64, "mean": 3.106010675430298, "std": 3.1618149280548096, "min": -3.00469970703125, "p10": 0.08932342529296888, "median": 2.67901611328125, "p90": 7.0016098022460955, "max": 12.861572265625, "pos_frac": 0.90625, "sample": [2.6644821166992188, 0.6275634765625, 0.2225360870361328, 3.1815032958984375, 2.800924301147461, 4.661121368408203, 0.5832366943359375, 1.9867210388183594, -0.9292449951171875, 0.62567138671875, 0.24828147888183594, 1.5381546020507812, 1.8750457763671875, 7.582313537597656, 6.262992858886719, 2.7088470458984375, 3.5463294982910156, 1.6248207092285156, 2.5038223266601562, 1.1536102294921875, 2.179107666015625, -2.592184066772461, 3.05377197265625, 0.03223228454589844, 10.81707763671875, 8.347000122070312, 0.5868988037109375, 5.275138854980469, 3.89569091796875, 0.3891105651855469, 6.625205993652344, 12.861572265625, 3.9819564819335938, 0.9897613525390625, -1.2103500366210938, -3.00469970703125, 3.1585960388183594, 3.350025177001953, 3.8369903564453125, 4.689746856689453, 2.518625259399414, -0.12350845336914062, 0.261810302734375, 4.0593414306640625, 4.370506286621094, 11.59136962890625, 2.5185813903808594, 1.832122802734375, 7.162925720214844, 6.2316436767578125, 4.8246612548828125, 2.6935501098632812, 4.55242919921875, 4.381309509277344, 10.767333984375, 0.41550445556640625, -0.697906494140625, 2.5496749877929688, 1.4276599884033203, 4.007164001464844, 4.340972900390625, 1.7870712280273438, 3.983489990234375, 0.5969696044921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000049.npy"}
|
|
{"epoch": 0.07195301027900147, "step": 50, "batch_size": 64, "mean": 3.4672958850860596, "std": 3.172093629837036, "min": -2.098276138305664, "p10": -0.3283157348632812, "median": 2.6837759017944336, "p90": 7.828643035888673, "max": 11.830322265625, "pos_frac": 0.84375, "sample": [6.118438720703125, -0.42040252685546875, -0.28179931640625, 8.312702178955078, 1.0746994018554688, 2.22637939453125, 6.263160705566406, 1.5500984191894531, 3.704761505126953, 1.7718839645385742, -0.3482513427734375, 6.560089111328125, 2.719451904296875, 11.5499267578125, 0.8858184814453125, 2.445068359375, 6.0536956787109375, 2.648099899291992, 8.51153564453125, 3.0682373046875, 0.6090259552001953, 4.5501861572265625, 7.652900695800781, 2.2680511474609375, 2.094165802001953, 4.329006195068359, -0.4041175842285156, -0.44062042236328125, 2.9274673461914062, 7.903961181640625, 5.607086181640625, 4.7144317626953125, 8.57071304321289, 11.830322265625, -2.098276138305664, 0.5778484344482422, 1.15985107421875, 1.9690933227539062, 0.1652545928955078, 3.1579513549804688, 2.2338027954101562, 1.962982177734375, 5.832763671875, 2.9759140014648438, 6.489727020263672, 6.190704345703125, -0.17104339599609375, 1.7848548889160156, 4.6627349853515625, -0.10736083984375, 5.8458251953125, 2.5570526123046875, 4.277622222900391, 5.004127502441406, 2.376110076904297, 5.142967224121094, 6.46405029296875, 2.270294189453125, 10.071388244628906, -0.7724800109863281, 5.7305145263671875, 1.1872024536132812, -1.7078704833984375, 0.04716300964355469], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000050.npy"}
|
|
{"epoch": 0.07342143906020558, "step": 51, "batch_size": 64, "mean": 3.4574432373046875, "std": 4.225892066955566, "min": -4.7260894775390625, "p10": -0.5537408828735351, "median": 2.3147716522216797, "p90": 8.899594879150392, "max": 16.56494140625, "pos_frac": 0.8125, "sample": [8.433036804199219, 4.033821105957031, 1.1139850616455078, -0.4462471008300781, 0.891754150390625, 0.384735107421875, 7.78387451171875, -0.8133144378662109, 1.63525390625, -0.0571441650390625, 9.311767578125, 6.775768280029297, 1.9082374572753906, 3.749786376953125, 2.029470443725586, 4.090484619140625, 3.1354446411132812, -0.5998096466064453, 8.663909912109375, 5.159908294677734, 3.7125282287597656, -1.034912109375, 0.031948089599609375, 4.016387939453125, 1.96160888671875, 6.813358306884766, 0.6442489624023438, 4.819217681884766, 0.8887519836425781, 4.520851135253906, 16.56494140625, 2.476245880126953, 0.8954505920410156, 4.770408630371094, 2.8694324493408203, 12.327987670898438, 1.2663192749023438, 13.145462036132812, 4.8486175537109375, 1.8034687042236328, 2.1532974243164062, 1.2331619262695312, 3.271881103515625, 9.000602722167969, 1.1904106140136719, 0.24634933471679688, 5.525871276855469, -3.3704833984375, -0.3130207061767578, 14.812179565429688, 12.748245239257812, 6.2922515869140625, 6.369556427001953, -0.6370468139648438, 1.1324481964111328, 5.662864685058594, 0.1133270263671875, -0.05405426025390625, 2.1391754150390625, -4.7260894775390625, 2.749664306640625, -1.23480224609375, 2.5347366333007812, -0.09121322631835938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000051.npy"}
|
|
{"epoch": 0.07488986784140969, "step": 52, "batch_size": 64, "mean": 5.342073440551758, "std": 4.270158290863037, "min": -2.2415924072265625, "p10": 0.6730659484863284, "median": 4.45838737487793, "p90": 11.044255828857423, "max": 18.782638549804688, "pos_frac": 0.953125, "sample": [1.4699058532714844, 11.164192199707031, 4.462562561035156, 10.764404296875, 6.995216369628906, 8.786972045898438, 2.36419677734375, 6.32122802734375, 8.20611572265625, 12.08563232421875, 0.3324737548828125, 11.26324462890625, 5.171352386474609, 5.065666198730469, 7.324594497680664, 7.604581832885742, 2.3317337036132812, 4.253852844238281, 15.017105102539062, 8.391799926757812, 3.2185497283935547, 0.9737777709960938, 10.292648315429688, 2.4985408782958984, 0.05667686462402344, 6.353721618652344, 2.4054031372070312, 1.6717472076416016, 8.016836166381836, 18.782638549804688, 8.28912353515625, 2.0989856719970703, 9.221229553222656, 4.243061065673828, 3.917064666748047, 11.914886474609375, 4.161388397216797, 0.544189453125, 4.071434020996094, -2.0187606811523438, -2.2415924072265625, 1.9023208618164062, 6.625343322753906, 5.566764831542969, 1.7009963989257812, 8.038314819335938, 4.454212188720703, 10.05780029296875, 6.418743133544922, 1.604879379272461, 0.2956390380859375, 3.222503662109375, 2.8903541564941406, 4.484245300292969, 2.9068832397460938, 8.941539764404297, 4.972553253173828, 1.3052444458007812, 15.689437866210938, -0.7672576904296875, 5.479408264160156, 4.359920501708984, 2.735492706298828, 1.1590023040771484], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000052.npy"}
|
|
{"epoch": 0.0763582966226138, "step": 53, "batch_size": 64, "mean": 5.282422065734863, "std": 5.726108074188232, "min": -3.1252670288085938, "p10": 0.2155189514160159, "median": 3.555267333984375, "p90": 13.283502960205078, "max": 24.554946899414062, "pos_frac": 0.90625, "sample": [-1.2458305358886719, 2.8748779296875, 3.0995864868164062, 1.509368896484375, 0.9586944580078125, 3.5723190307617188, 11.708984375, -0.32977294921875, 7.144538879394531, -1.622467041015625, 1.7731857299804688, 24.526611328125, -2.2942428588867188, 0.6493988037109375, 6.0348358154296875, 6.788951873779297, 2.155487060546875, 9.318801879882812, 2.2791671752929688, 11.124160766601562, 4.207523345947266, 4.5419158935546875, 11.242401123046875, 6.265071868896484, 13.789909362792969, 0.8279571533203125, 4.6675262451171875, 0.5057525634765625, 5.057735443115234, 1.632223129272461, 16.470489501953125, 7.439510345458984, 6.303127288818359, 8.099090576171875, 3.4083518981933594, 0.09113311767578125, 0.8756561279296875, 3.7122154235839844, 1.1455001831054688, 4.269746780395508, 3.1815032958984375, 6.782899856567383, 1.5660686492919922, 13.045585632324219, -3.1252670288085938, 16.067001342773438, 16.1153564453125, 3.5382156372070312, 3.908416748046875, 2.937490463256836, 1.985769271850586, 3.74774169921875, 2.4104232788085938, 0.87872314453125, 3.4529647827148438, 4.150058746337891, 2.8480052947998047, 24.554946899414062, 2.227588653564453, 12.23919677734375, 4.435878753662109, 3.2766494750976562, 13.385467529296875, -0.11518096923828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000053.npy"}
|
|
{"epoch": 0.07782672540381791, "step": 54, "batch_size": 64, "mean": 4.68695068359375, "std": 4.391002178192139, "min": -0.9171085357666016, "p10": 0.4863510131835938, "median": 3.727235794067383, "p90": 10.527245330810548, "max": 19.796707153320312, "pos_frac": 0.9375, "sample": [6.206634521484375, 4.5679931640625, 7.9571533203125, 0.6313095092773438, 0.7806835174560547, 3.0215530395507812, 1.0548477172851562, 2.2219104766845703, 5.3105010986328125, 7.710605621337891, 1.0287284851074219, 4.593345642089844, 10.590103149414062, 1.959066390991211, 3.651355743408203, 10.380577087402344, -0.37940406799316406, 4.1916351318359375, 4.97089958190918, 5.300331115722656, -0.17218780517578125, 1.1333236694335938, 14.592720031738281, 0.7683258056640625, 6.831260681152344, 0.45357513427734375, 4.316310882568359, 4.5063629150390625, 5.384925842285156, 5.578765869140625, 1.4474124908447266, 0.0836944580078125, 7.066169738769531, -0.20058822631835938, 0.6688365936279297, 3.2072677612304688, 11.583045959472656, 3.2788543701171875, 4.751190185546875, 12.413749694824219, 1.3858203887939453, 2.0055007934570312, 19.796707153320312, 9.8756103515625, -0.9171085357666016, 2.5343170166015625, 0.14381790161132812, 11.564559936523438, 0.5628280639648438, 3.8031158447265625, 1.39404296875, 4.3717193603515625, 9.189506530761719, 1.3123397827148438, 2.54052734375, 15.841583251953125, 1.2464447021484375, 4.087833404541016, 3.0759658813476562, 10.123016357421875, 2.228565216064453, 9.84783935546875, 1.0555706024169922, 9.451881408691406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000054.npy"}
|
|
{"epoch": 0.07929515418502203, "step": 55, "batch_size": 64, "mean": 5.594723701477051, "std": 6.283792495727539, "min": -9.2359619140625, "p10": -1.0808685302734373, "median": 5.042801856994629, "p90": 14.160139465332032, "max": 24.687164306640625, "pos_frac": 0.828125, "sample": [7.104835510253906, 8.290130615234375, 5.148384094238281, -3.5238800048828125, 20.868988037109375, -1.9232177734375, 6.758319854736328, 3.752086639404297, 14.349502563476562, 0.21209716796875, 5.733222961425781, 5.600101470947266, 1.5887680053710938, 9.83367919921875, 17.27435302734375, 8.500045776367188, 4.101615905761719, 8.517402648925781, 24.687164306640625, 3.6086082458496094, 10.484634399414062, -1.4562530517578125, 4.023979187011719, 7.157722473144531, 21.531448364257812, -0.12837982177734375, 13.8868408203125, 0.515350341796875, 0.36980628967285156, 7.016849517822266, 3.9897689819335938, -1.477569580078125, 5.133430480957031, 0.24317169189453125, 9.991363525390625, 4.123710632324219, -1.1470108032226562, 0.3438568115234375, 0.6917781829833984, -4.192924499511719, 8.661724090576172, 2.556612014770508, 1.85540771484375, 13.288505554199219, 6.4734954833984375, -0.9265365600585938, 1.167337417602539, 9.2353515625, 4.952173233032227, -9.2359619140625, 2.4984493255615234, 14.277267456054688, 6.092315673828125, 3.4688758850097656, 6.560405731201172, 5.142240524291992, -0.7726421356201172, 8.241485595703125, 6.753135681152344, 12.356880187988281, 4.7299346923828125, -0.21563720703125, 4.680107116699219, 14.637588500976562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000055.npy"}
|
|
{"epoch": 0.08076358296622614, "step": 56, "batch_size": 64, "mean": 5.401305675506592, "std": 6.240213394165039, "min": -8.274402618408203, "p10": -1.285307693481445, "median": 5.024249076843262, "p90": 13.52677154541016, "max": 23.052413940429688, "pos_frac": 0.796875, "sample": [-0.894775390625, 6.329704284667969, -3.18389892578125, 7.581382751464844, -0.3791389465332031, -8.274402618408203, 9.371391296386719, -1.4526786804199219, 3.2925643920898438, -3.2775115966796875, 3.1656875610351562, 2.055583953857422, 5.459739685058594, 13.954681396484375, 2.178396224975586, -0.1470489501953125, -0.38501739501953125, 9.882522583007812, 9.18198013305664, 7.390625, -2.1727218627929688, 9.716842651367188, 2.8251571655273438, 20.2235107421875, 7.9698638916015625, 13.83990478515625, 1.1342334747314453, 0.384613037109375, 8.44906234741211, 2.493558883666992, 6.6365509033203125, 6.986396789550781, 8.304832458496094, 1.5541763305664062, 9.795295715332031, 10.894779205322266, 0.4507865905761719, 17.075592041015625, 6.927577972412109, 6.040962219238281, 5.319330215454102, 4.421543121337891, 2.9780826568603516, 17.64276123046875, 5.973564147949219, 11.971328735351562, 5.729541778564453, 10.28369140625, 0.5908737182617188, 23.052413940429688, 1.9586029052734375, -0.15682601928710938, 3.8636741638183594, 12.796127319335938, -6.1269378662109375, 2.8462352752685547, -0.5472564697265625, 19.733078002929688, 4.729167938232422, -2.615631103515625, 3.7771224975585938, 1.9891853332519531, 5.7949371337890625, 8.298187255859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000056.npy"}
|
|
{"epoch": 0.08223201174743025, "step": 57, "batch_size": 64, "mean": 5.9908952713012695, "std": 5.517789363861084, "min": -8.17169189453125, "p10": 0.06918010711669925, "median": 5.874476432800293, "p90": 13.257407760620117, "max": 20.555191040039062, "pos_frac": 0.90625, "sample": [7.528026580810547, 3.3008785247802734, 0.10302734375, -1.3163299560546875, 9.198089599609375, -8.17169189453125, 10.337295532226562, 0.3446521759033203, 13.13714599609375, 6.216053009033203, 14.70745849609375, 3.361848831176758, 20.555191040039062, 2.9178924560546875, 6.494209289550781, 5.2714996337890625, -1.1713829040527344, 2.5698318481445312, 6.1651153564453125, 18.660964965820312, 10.5634765625, 14.227813720703125, 10.37451171875, 3.6066665649414062, 6.039863586425781, 1.3224201202392578, 13.308948516845703, 5.709089279174805, 3.7193775177001953, 2.1890411376953125, 0.8462066650390625, 4.109804153442383, 2.9964523315429688, 0.05467414855957031, 6.456634521484375, 6.56146240234375, -0.1719512939453125, 1.2735137939453125, 2.6796188354492188, 8.055381774902344, 10.190338134765625, 1.9456615447998047, 9.010322570800781, 4.964881896972656, 7.413177490234375, -2.26702880859375, 8.614349365234375, 6.353862762451172, 9.25494384765625, 3.169870376586914, 2.387605667114258, 15.732192993164062, 4.9757232666015625, 2.6106338500976562, 8.063217163085938, 4.2883758544921875, 0.9016780853271484, 11.307220458984375, 10.276786804199219, -5.2220458984375, 12.670578002929688, 17.602920532226562, 7.02302360534668, 8.016227722167969], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000057.npy"}
|
|
{"epoch": 0.08370044052863436, "step": 58, "batch_size": 64, "mean": 6.841357231140137, "std": 7.0269622802734375, "min": -7.783966064453125, "p10": -0.14983768463134756, "median": 5.293179512023926, "p90": 16.892082977294933, "max": 25.17742919921875, "pos_frac": 0.875, "sample": [14.053573608398438, -1.3966808319091797, 0.6813087463378906, 4.406871795654297, 3.6741104125976562, 3.796537399291992, 18.758018493652344, 3.2460250854492188, 13.260627746582031, 2.8306732177734375, 5.684436798095703, 7.0430145263671875, 7.4449005126953125, 3.740978240966797, 2.1091537475585938, 12.841110229492188, 9.978927612304688, 8.900043487548828, 4.901922225952148, 2.789308547973633, 6.989967346191406, 13.911617279052734, 13.265754699707031, -1.2629852294921875, 11.119247436523438, 2.22735595703125, 8.472953796386719, 6.53892707824707, 21.054771423339844, 3.953001022338867, 25.17742919921875, 7.687244415283203, 6.080600738525391, -0.1931781768798828, 14.439353942871094, 1.0703754425048828, 13.290916442871094, 3.4192657470703125, 1.9995651245117188, -1.773681640625, 1.7152976989746094, 2.5392532348632812, 21.312240600585938, -1.9584274291992188, 9.860565185546875, -0.048709869384765625, 1.1014747619628906, 6.76556396484375, -4.61773681640625, 21.933929443359375, 1.9952468872070312, 5.98419189453125, 0.8616352081298828, 13.572589874267578, 10.66207504272461, -7.783966064453125, 21.959442138671875, 1.511627197265625, 2.805206298828125, 6.89837646484375, 13.960273742675781, 1.6928424835205078, 0.9672470092773438, 17.943252563476562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000058.npy"}
|
|
{"epoch": 0.08516886930983847, "step": 59, "batch_size": 64, "mean": 7.2461676597595215, "std": 7.300617218017578, "min": -6.076690673828125, "p10": -0.433093643188476, "median": 6.3211517333984375, "p90": 18.495296478271488, "max": 30.740615844726562, "pos_frac": 0.890625, "sample": [8.125591278076172, 7.9040679931640625, 14.445068359375, 19.02667236328125, 15.469947814941406, 3.890869140625, 30.740615844726562, 19.559112548828125, 6.935672760009766, 15.028350830078125, 0.9504203796386719, 7.835361480712891, -3.2321128845214844, 2.2710704803466797, 20.290008544921875, -0.6845512390136719, 2.2743043899536133, 13.524795532226562, 6.34893798828125, 18.872573852539062, 0.8730697631835938, 2.0139923095703125, 1.71502685546875, 1.2579765319824219, 3.4227981567382812, 22.137725830078125, 2.3172779083251953, 1.9368000030517578, 7.514019012451172, -6.076690673828125, 9.350746154785156, -1.8889884948730469, 7.217643737792969, 7.303398132324219, 8.936241149902344, 17.61498260498047, 2.4818572998046875, 11.640335083007812, 6.81829833984375, 3.613861083984375, -5.231971740722656, -2.646575927734375, 5.62397575378418, 0.1536407470703125, 3.9578609466552734, 9.648406982421875, 7.348600387573242, 2.719451904296875, 7.314674377441406, 2.5456886291503906, 2.319011688232422, 6.094268798828125, 2.995016098022461, 8.865825653076172, 5.440692901611328, 13.591033935546875, 9.198688507080078, 3.745006561279297, 2.681976318359375, -1.953125, 16.485816955566406, 17.354171752929688, 6.293365478515625, 19.43206787109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000059.npy"}
|
|
{"epoch": 0.08663729809104258, "step": 60, "batch_size": 64, "mean": 5.918500900268555, "std": 6.486839294433594, "min": -13.40045166015625, "p10": -1.2171134948730467, "median": 6.338956832885742, "p90": 12.138288879394532, "max": 24.89936065673828, "pos_frac": 0.8125, "sample": [7.357627868652344, -2.7562103271484375, 1.542245864868164, 11.600959777832031, 0.6374778747558594, 3.207683563232422, 10.967620849609375, 7.699211120605469, 1.5421295166015625, -13.40045166015625, 6.586397171020508, -1.028564453125, 1.9801559448242188, 4.071788787841797, 10.746337890625, 6.08538818359375, 5.32269287109375, 10.173324584960938, 3.7440032958984375, 9.7618408203125, 13.306175231933594, 11.831085205078125, 17.149322509765625, 8.802734375, 8.198944091796875, 7.697257995605469, -0.5550689697265625, 6.075408935546875, 9.859512329101562, -0.027177810668945312, 8.85525894165039, 17.733856201171875, 4.272941589355469, 6.2181243896484375, 7.221965789794922, -1.5331573486328125, 10.02728271484375, 8.933258056640625, 1.5339508056640625, -4.9857330322265625, 11.848007202148438, 2.8887977600097656, 5.555091857910156, 7.969200134277344, 3.6063613891601562, -0.902923583984375, -9.575645446777344, 15.565841674804688, 18.682296752929688, 11.014678955078125, 12.2626953125, 2.579132080078125, -1.8188285827636719, -1.2979202270507812, 10.901872634887695, 6.459789276123047, 24.89936065673828, 8.391525268554688, 1.8313217163085938, 9.118215560913086, -1.0222244262695312, 7.8137359619140625, 4.468107223510742, 1.0879573822021484], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000060.npy"}
|
|
{"epoch": 0.0881057268722467, "step": 61, "batch_size": 64, "mean": 7.234993934631348, "std": 9.665848731994629, "min": -10.121078491210938, "p10": -1.0125953674316406, "median": 5.258674621582031, "p90": 16.99438934326172, "max": 54.455108642578125, "pos_frac": 0.828125, "sample": [2.597055435180664, 1.9797630310058594, -0.9979782104492188, 2.3847198486328125, 13.497146606445312, 0.07498359680175781, 0.5705909729003906, 7.312076568603516, -0.8020401000976562, 26.378997802734375, 20.565902709960938, 18.272674560546875, 25.645263671875, 12.769142150878906, 7.810066223144531, 9.252742767333984, -1.4332275390625, 1.8917865753173828, 11.660655975341797, 11.035316467285156, 3.3927230834960938, 5.305728912353516, 15.93487548828125, 8.260944366455078, 6.087913513183594, 6.811574935913086, 5.211620330810547, 5.1347198486328125, 8.293228149414062, 8.55078125, 0.056884765625, -0.1553955078125, 13.123603820800781, 16.472442626953125, 15.4803466796875, 21.206222534179688, 17.072433471679688, 1.393829345703125, -1.378509521484375, 12.664535522460938, 3.0389633178710938, 0.11977577209472656, 11.893875122070312, 2.7593727111816406, 1.2110729217529297, 16.812286376953125, 1.3246917724609375, 3.058856964111328, 54.455108642578125, 12.529403686523438, 11.927375793457031, -0.8235702514648438, -10.121078491210938, 11.705703735351562, 2.4241714477539062, -4.948997497558594, 5.533332824707031, 3.8033370971679688, -6.927131652832031, 2.2256927490234375, 6.558258056640625, -5.4430999755859375, -1.01885986328125, 1.5549449920654297], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000061.npy"}
|
|
{"epoch": 0.08957415565345081, "step": 62, "batch_size": 64, "mean": 5.499805450439453, "std": 6.936376094818115, "min": -8.996490478515625, "p10": -3.003179740905761, "median": 5.13128662109375, "p90": 14.154901123046878, "max": 28.890579223632812, "pos_frac": 0.84375, "sample": [9.003116607666016, 1.6455764770507812, 0.7761669158935547, 10.323074340820312, 6.068868637084961, 10.900482177734375, 6.562097549438477, 1.0193099975585938, 9.349903106689453, -5.1210479736328125, 14.410964965820312, 6.057609558105469, 1.6878089904785156, -0.0146484375, 8.993354797363281, 3.3260040283203125, 8.367385864257812, 5.361785888671875, 0.1278858184814453, 5.016670227050781, 6.46234130859375, 7.9366302490234375, 2.15887451171875, 5.3757476806640625, -3.8869361877441406, 8.1944580078125, 4.265529632568359, 8.010459899902344, -0.2246856689453125, 28.890579223632812, 16.691795349121094, 8.456916809082031, 5.882011413574219, 2.148487091064453, 1.84344482421875, 5.137208938598633, 0.8885040283203125, 17.214065551757812, -8.996490478515625, -3.4571990966796875, -5.567962646484375, 0.7689971923828125, 1.7340965270996094, 0.9676780700683594, 1.6198654174804688, 8.594802856445312, 16.041057586669922, 9.489139556884766, -3.3240909576416016, 13.557418823242188, 7.954708099365234, 5.125364303588867, -5.48211669921875, 3.6958446502685547, 12.654045104980469, 3.7667465209960938, 3.1468658447265625, 4.196601867675781, 25.408279418945312, 19.344924926757812, 6.805614471435547, 0.5625190734863281, -2.2543869018554688, 6.3274383544921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000062.npy"}
|
|
{"epoch": 0.09104258443465492, "step": 63, "batch_size": 64, "mean": 7.43485164642334, "std": 7.748790264129639, "min": -12.339393615722656, "p10": -0.5918756484985345, "median": 6.6623382568359375, "p90": 16.004951477050785, "max": 31.493316650390625, "pos_frac": 0.890625, "sample": [24.23431396484375, 7.519203186035156, 4.742738723754883, 8.539382934570312, 3.8345947265625, 1.3347930908203125, 4.507438659667969, -6.236824035644531, 5.747215270996094, 6.350563049316406, -12.339393615722656, 14.681617736816406, 8.640708923339844, 7.121337890625, 2.881816864013672, -0.8816051483154297, 16.4527587890625, 4.45867919921875, 12.641952514648438, 20.511940002441406, 12.194160461425781, 14.088623046875, 6.0806732177734375, 0.08415985107421875, -1.110443115234375, 11.849573135375977, -4.0507965087890625, 0.18497657775878906, 4.8161163330078125, 3.1766223907470703, 11.362747192382812, 7.466850280761719, 6.974113464355469, 9.316669464111328, 10.71197509765625, 11.058425903320312, 13.605865478515625, 13.590217590332031, 5.225292205810547, 2.1175613403320312, 0.1864776611328125, 14.960067749023438, 2.362062454223633, 5.944160461425781, -9.474884033203125, 12.911916732788086, 7.1904144287109375, 1.4167633056640625, 2.63201904296875, 6.308837890625, 2.47760009765625, 13.363876342773438, 16.982498168945312, 8.055606842041016, 17.372879028320312, 1.0458984375, 8.574163436889648, 31.493316650390625, -3.312164306640625, 14.626800537109375, 5.10992431640625, 25.587623596191406, 5.616004943847656, 10.912010192871094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000063.npy"}
|
|
{"epoch": 0.09251101321585903, "step": 64, "batch_size": 64, "mean": 8.647798538208008, "std": 9.687501907348633, "min": -15.902725219726562, "p10": -0.5983268737792966, "median": 7.064637184143066, "p90": 20.69824676513672, "max": 45.84747314453125, "pos_frac": 0.8125, "sample": [0.5482559204101562, 17.798309326171875, 25.020370483398438, 7.587436676025391, 5.377939224243164, 11.613136291503906, 3.248392105102539, 15.307937622070312, -0.7014026641845703, 6.535608291625977, 1.8784904479980469, 10.280288696289062, 1.1004905700683594, 5.059196472167969, 11.267829895019531, 29.569717407226562, 4.2610931396484375, -1.539459228515625, 17.565887451171875, 7.131200790405273, 11.149417877197266, -2.7390518188476562, 27.9649658203125, 8.099987030029297, -0.14403724670410156, 6.284080505371094, -0.7950839996337891, 6.594072341918945, -15.902725219726562, -0.23607254028320312, 7.490806579589844, -0.31134796142578125, 45.84747314453125, 5.341520309448242, 1.9637012481689453, 9.67251205444336, 16.676071166992188, 20.236190795898438, 10.755401611328125, 6.113658905029297, 6.0205078125, 22.59807586669922, 10.574729919433594, 8.247352600097656, -0.34857177734375, -8.972000122070312, 23.737335205078125, 3.5614013671875, 20.896270751953125, 0.512237548828125, 13.81268310546875, 12.791412353515625, 12.320640563964844, -1.4083175659179688, 12.476203918457031, -0.3578166961669922, 5.336067199707031, 6.998073577880859, 13.070571899414062, 12.223068237304688, 1.7231674194335938, 11.227142333984375, 18.64368438720703, 4.80291748046875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000064.npy"}
|
|
{"epoch": 0.09397944199706314, "step": 65, "batch_size": 64, "mean": 8.72087287902832, "std": 9.570690155029297, "min": -21.67119598388672, "p10": -0.9569469451904293, "median": 7.41032600402832, "p90": 21.971057891845707, "max": 30.466827392578125, "pos_frac": 0.84375, "sample": [30.466827392578125, 24.398056030273438, 16.84210205078125, 13.427871704101562, 5.40620231628418, 8.901473999023438, 0.7904815673828125, 12.853523254394531, 3.2370471954345703, 20.11968231201172, 15.117576599121094, 1.4248504638671875, -0.3220329284667969, 22.394775390625, 26.645278930664062, 1.0547294616699219, 9.800048828125, 2.5714664459228516, -4.562141418457031, 4.8486785888671875, 1.0724849700927734, 17.210952758789062, 5.9335479736328125, -21.67119598388672, 16.076934814453125, 8.403701782226562, -3.6001129150390625, 7.430339813232422, 12.955520629882812, 4.953624725341797, 5.9239044189453125, 6.532413482666016, -2.180828094482422, 5.048667907714844, 27.235336303710938, 7.455623626708984, 20.982383728027344, 15.236900329589844, 24.88452911376953, 7.390312194824219, -1.0902099609375, -11.140289306640625, 9.023025512695312, 2.1015548706054688, 7.107513427734375, 17.676589965820312, 23.53333282470703, -1.0811691284179688, 5.412689208984375, 19.134239196777344, 12.140472412109375, 4.207366943359375, 19.341354370117188, -0.6670951843261719, 11.52703857421875, 7.55389404296875, 5.742988586425781, 1.8737030029296875, 17.117767333984375, 11.019287109375, -0.2923164367675781, 7.934133529663086, 0.1371917724609375, 7.131307601928711], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000065.npy"}
|
|
{"epoch": 0.09544787077826726, "step": 66, "batch_size": 64, "mean": 8.126091003417969, "std": 9.668362617492676, "min": -8.4381103515625, "p10": -0.7879472732543942, "median": 6.606025695800781, "p90": 19.169573593139653, "max": 45.73918151855469, "pos_frac": 0.84375, "sample": [7.4155731201171875, 45.73918151855469, 1.4171981811523438, 14.6307373046875, 13.282318115234375, 11.64923095703125, 1.9528541564941406, -0.15094757080078125, 10.240056991577148, 6.5942840576171875, -7.453155517578125, 2.3410816192626953, 5.998233795166016, -0.9534912109375, 12.207660675048828, 3.3265304565429688, 4.450613021850586, 0.15003013610839844, 2.1835670471191406, 19.465438842773438, 7.503379821777344, 10.230422973632812, 0.5877113342285156, 10.603385925292969, 16.922706604003906, 2.5684432983398438, 3.7482757568359375, -2.651885986328125, -3.9842300415039062, -8.4381103515625, 9.537971496582031, 17.624855041503906, 18.47922134399414, 7.320396423339844, 3.2715320587158203, 11.39520263671875, 6.617767333984375, -0.40167808532714844, 2.00067138671875, 14.094474792480469, 10.204109191894531, 3.4092941284179688, -0.26153564453125, 7.473030090332031, 5.767023086547852, -7.7455902099609375, 9.878868103027344, 1.884664535522461, -2.888763427734375, 13.952812194824219, 3.8601226806640625, 20.360122680664062, 26.265426635742188, 21.123023986816406, 4.358848571777344, 17.35022735595703, 1.6889228820800781, 7.717342376708984, 15.066520690917969, 2.4561824798583984, 32.522430419921875, 1.4632186889648438, 29.74249267578125, 12.903533935546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000066.npy"}
|
|
{"epoch": 0.09691629955947137, "step": 67, "batch_size": 64, "mean": 8.405787467956543, "std": 9.644529342651367, "min": -12.236480712890625, "p10": -1.0120384216308589, "median": 6.348476409912109, "p90": 20.069532012939455, "max": 45.02313232421875, "pos_frac": 0.859375, "sample": [21.604873657226562, 3.2599945068359375, 21.428436279296875, 1.2414398193359375, 2.2302207946777344, -0.40283203125, 15.406295776367188, 5.383354187011719, -6.2746429443359375, 2.3457794189453125, 17.016338348388672, 9.605018615722656, 3.7919387817382812, 15.874488830566406, -0.5295333862304688, -4.499063491821289, 10.192535400390625, 8.664985656738281, 1.3498611450195312, -12.236480712890625, 20.046722412109375, 8.674118041992188, 0.23089599609375, 0.5065460205078125, 5.2785186767578125, 2.62591552734375, 20.079307556152344, 6.660734176635742, -1.2188262939453125, 6.8415069580078125, 4.2195281982421875, 13.845218658447266, 6.5012054443359375, 2.3004283905029297, 2.5805206298828125, 9.813240051269531, 14.815679550170898, 5.693117141723633, 1.444122314453125, -5.2248382568359375, 4.903388977050781, 17.507659912109375, 4.2989501953125, 3.5573043823242188, 26.3236083984375, 14.8209228515625, 6.725593566894531, 17.542648315429688, 13.943870544433594, 14.075119018554688, 12.621345520019531, 5.367536544799805, 11.873052597045898, 31.924026489257812, 6.195747375488281, 1.3681964874267578, 45.02313232421875, 3.4608497619628906, -2.4450531005859375, 24.97303009033203, 19.245452880859375, 6.878856658935547, -1.9604034423828125, 8.578893661499023], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000067.npy"}
|
|
{"epoch": 0.09838472834067548, "step": 68, "batch_size": 64, "mean": 8.072154998779297, "std": 8.153580665588379, "min": -6.998992919921875, "p10": 0.3214328765869142, "median": 5.979337692260742, "p90": 19.700840759277344, "max": 36.738006591796875, "pos_frac": 0.90625, "sample": [-3.6640625, 3.652801513671875, 23.621551513671875, 7.900733947753906, 5.293611526489258, -5.7846527099609375, 8.975982666015625, 22.6048583984375, 0.6870956420898438, 0.4654121398925781, 14.175018310546875, 15.27362060546875, 16.293479919433594, 11.702301025390625, 10.895004272460938, 0.25972747802734375, 5.895664215087891, 12.24459457397461, 3.3386306762695312, 8.350082397460938, -0.36865997314453125, 4.828617095947266, 5.336675643920898, 3.617431640625, 7.124357223510742, 13.16754150390625, 18.457046508789062, 8.25030517578125, 8.555416107177734, 19.207916259765625, 19.912094116210938, 21.684139251708984, 1.3344955444335938, 6.205635070800781, 9.185264587402344, 1.2225379943847656, -0.21123123168945312, 5.577281951904297, 2.389312744140625, 4.8373565673828125, 16.480838775634766, 2.320476531982422, 10.546075820922852, -6.998992919921875, 6.669517517089844, 6.063011169433594, 3.807188034057617, 21.862464904785156, 2.6302413940429688, 1.623983383178711, 1.8057308197021484, 36.738006591796875, 24.90520477294922, 16.833831787109375, 12.512321472167969, 1.6516189575195312, 2.026599884033203, 5.029022216796875, 11.394081115722656, -3.0075607299804688, 4.580535888671875, 2.3401145935058594, 4.8917694091796875, 7.416877746582031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000068.npy"}
|
|
{"epoch": 0.09985315712187959, "step": 69, "batch_size": 64, "mean": 9.67041301727295, "std": 9.418717384338379, "min": -4.722747802734375, "p10": 0.0623584747314459, "median": 7.1080474853515625, "p90": 20.281210327148443, "max": 44.1767578125, "pos_frac": 0.890625, "sample": [4.4684906005859375, 6.465248107910156, 30.120574951171875, 8.937858581542969, 12.120025634765625, 15.844223022460938, -4.09429931640625, -0.5853118896484375, -0.19494247436523438, 3.73406982421875, 5.9187774658203125, 8.81119155883789, -0.6080322265625, 8.472541809082031, 7.3455657958984375, 14.54812240600586, 25.194732666015625, 6.8705291748046875, 16.350540161132812, 15.438972473144531, -0.2808876037597656, 23.747177124023438, 12.977874755859375, 10.494316101074219, 18.37139129638672, 13.622344970703125, 4.451881408691406, 16.06311798095703, 13.044410705566406, 5.219278335571289, 1.0956459045410156, 0.8082466125488281, 19.082977294921875, 44.1767578125, 4.7476959228515625, 19.067306518554688, 5.495124816894531, 16.570419311523438, 16.198516845703125, 1.098541259765625, 1.6050033569335938, 16.353363037109375, 11.65713119506836, 1.2287826538085938, -0.7091197967529297, 14.70095443725586, 6.318058013916016, 35.88185119628906, 4.1750030517578125, 7.7617950439453125, 6.451959609985352, -4.722747802734375, 5.191642761230469, 3.0335311889648438, 2.0637969970703125, 7.3499298095703125, 20.79473876953125, 21.328826904296875, 1.126129150390625, 18.326004028320312, 1.7182979583740234, 1.3261795043945312, 4.071573257446289, 0.6627273559570312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000069.npy"}
|
|
{"epoch": 0.1013215859030837, "step": 70, "batch_size": 64, "mean": 10.396703720092773, "std": 11.003621101379395, "min": -8.4107666015625, "p10": -1.315313339233398, "median": 10.282015800476074, "p90": 25.77352981567384, "max": 40.713653564453125, "pos_frac": 0.84375, "sample": [6.428897857666016, 0.6703338623046875, 11.002208709716797, 19.273094177246094, 22.232864379882812, 12.152116775512695, -7.4710693359375, -1.5034027099609375, 13.275657653808594, 3.963357925415039, -2.3240280151367188, 12.771682739257812, -2.7922935485839844, 7.983978271484375, 37.64056396484375, 1.6192893981933594, 10.767333984375, 27.814071655273438, 10.787099838256836, 40.713653564453125, 11.644287109375, 10.231094360351562, 2.04412841796875, 29.138031005859375, 0.35724639892578125, 17.29912567138672, 19.07410430908203, 13.48277473449707, -2.133970260620117, 6.899261474609375, 5.860191345214844, 26.901145935058594, 2.198272705078125, 1.0438880920410156, 0.9333267211914062, -0.31858062744140625, 13.420032501220703, 3.0422821044921875, 11.263755798339844, -1.7005367279052734, 3.2923736572265625, 4.054994583129883, -0.7750396728515625, 2.265960693359375, 10.332937240600586, 1.372772216796875, 19.607139587402344, 23.142425537109375, -8.4107666015625, 5.110801696777344, 18.126487731933594, 2.8402481079101562, 1.7899208068847656, 31.935714721679688, 13.03537368774414, -0.8764381408691406, 34.9466552734375, 10.391742706298828, 20.765281677246094, 16.605297088623047, 3.789947509765625, 21.908714294433594, 13.348594665527344, 21.10259246826172], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000070.npy"}
|
|
{"epoch": 0.1027900146842878, "step": 71, "batch_size": 64, "mean": 12.112241744995117, "std": 11.967255592346191, "min": -9.733453750610352, "p10": 0.6317092895507823, "median": 10.262733459472656, "p90": 22.87934188842774, "max": 50.334136962890625, "pos_frac": 0.90625, "sample": [-5.991004943847656, 41.4300537109375, 6.083122253417969, 1.7551422119140625, 5.620021820068359, 12.496971130371094, 6.4303741455078125, 12.5518798828125, 18.104339599609375, 5.586124420166016, 22.15936279296875, 6.751131057739258, 17.442840576171875, 4.637535095214844, 3.258056640625, 7.3160400390625, 11.253231048583984, 15.637237548828125, -0.45610809326171875, 2.0313262939453125, 4.701324462890625, 43.79667663574219, 7.513542175292969, 7.209846496582031, 16.316566467285156, 16.243331909179688, 6.092311859130859, 12.431777954101562, 13.376556396484375, -8.195388793945312, 4.5580596923828125, 14.596343994140625, 10.497528076171875, -0.049747467041015625, 5.559572219848633, 18.188087463378906, 18.62957763671875, 0.150238037109375, 21.493301391601562, 16.000045776367188, 5.092769622802734, 13.006973266601562, 6.730583190917969, 18.216796875, 15.205093383789062, 17.09040069580078, 4.519775390625, 36.26173400878906, 46.865814208984375, 50.334136962890625, 2.0182342529296875, 23.187904357910156, -9.733453750610352, -0.783843994140625, 12.962287902832031, 6.22125244140625, 7.918392181396484, 11.371402740478516, 19.50146484375, 15.433868408203125, 31.03338623046875, 3.3791427612304688, 6.114236831665039, 10.027938842773438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000071.npy"}
|
|
{"epoch": 0.10425844346549193, "step": 72, "batch_size": 64, "mean": 12.90890121459961, "std": 13.027764320373535, "min": -14.447647094726562, "p10": -1.8090076446533174, "median": 10.432766914367676, "p90": 32.45179443359377, "max": 55.10162353515625, "pos_frac": 0.890625, "sample": [18.439292907714844, 15.646295547485352, 5.970466613769531, 13.530593872070312, 6.318809509277344, 3.736591339111328, 9.056621551513672, 27.251113891601562, -14.447647094726562, 26.1080322265625, 14.92232894897461, 14.509300231933594, 7.164880752563477, 1.229644775390625, 17.04193115234375, 9.050636291503906, 11.70550537109375, 25.16802215576172, 15.288047790527344, 34.30552673339844, 38.87931823730469, -3.039876937866211, 8.178764343261719, 10.502222061157227, 17.028961181640625, 3.8289947509765625, 18.719703674316406, 9.827003479003906, 10.363311767578125, 17.272804260253906, 4.169380187988281, 5.576812744140625, 11.584671020507812, -12.766754150390625, 35.05076599121094, 28.512008666992188, 19.290969848632812, 7.473583221435547, 23.155364990234375, -3.8061294555664062, 9.000347137451172, 40.4381103515625, 55.10162353515625, -7.013721466064453, 9.899419784545898, 25.906661987304688, 7.6982421875, 8.082996368408203, 1.0630207061767578, 34.14027404785156, -9.37518310546875, 4.411746978759766, 2.331939697265625, 17.824981689453125, 14.171035766601562, 19.901260375976562, 9.580459594726562, 15.931346893310547, 35.342681884765625, 11.992801666259766, 6.681583404541016, 2.6603469848632812, -3.2868194580078125, 1.8866195678710938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000072.npy"}
|
|
{"epoch": 0.10572687224669604, "step": 73, "batch_size": 64, "mean": 12.989622116088867, "std": 18.848773956298828, "min": -25.713653564453125, "p10": -9.154793548583983, "median": 10.167367935180664, "p90": 33.94851570129395, "max": 66.59498596191406, "pos_frac": 0.78125, "sample": [31.648056030273438, -14.090553283691406, 7.7367095947265625, 14.853591918945312, 6.9399871826171875, 35.390174865722656, -5.939430236816406, 25.850082397460938, 7.2023162841796875, -0.8545875549316406, 28.021697998046875, 33.70450210571289, 24.407196044921875, -1.3648529052734375, 0.4633636474609375, -14.498870849609375, 56.970489501953125, -1.6250762939453125, 10.965616226196289, 19.342453002929688, 17.51782989501953, 7.981895446777344, 10.462608337402344, 1.2534828186035156, 2.073984146118164, 9.872127532958984, -19.467723846435547, 11.083992004394531, 30.03521728515625, 15.642921447753906, 13.245994567871094, 3.256053924560547, 59.775482177734375, -12.721023559570312, 5.107540130615234, 30.950782775878906, 30.610984802246094, -10.522281646728516, 4.643833160400391, 2.6187305450439453, -1.7927093505859375, -6.4478302001953125, -9.917045593261719, 7.894250869750977, 17.42810821533203, 16.402442932128906, 2.7447071075439453, 5.386499404907227, 22.358299255371094, 43.65901184082031, 14.2357177734375, 6.047477722167969, 23.071868896484375, 5.911460876464844, 14.303550720214844, -7.3762054443359375, 15.219955444335938, 57.90202331542969, 28.408554077148438, 66.59498596191406, 3.1644020080566406, 34.05309295654297, 19.25152587890625, -25.713653564453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000073.npy"}
|
|
{"epoch": 0.10719530102790015, "step": 74, "batch_size": 64, "mean": 14.162808418273926, "std": 20.921667098999023, "min": -39.02606964111328, "p10": -3.30047149658203, "median": 10.677483558654785, "p90": 33.21564483642579, "max": 123.06634521484375, "pos_frac": 0.78125, "sample": [27.081329345703125, 9.379585266113281, 12.572189331054688, -0.13346290588378906, 24.977989196777344, 7.699287414550781, 10.225311279296875, 56.44910430908203, 2.757322311401367, 16.476051330566406, 16.064285278320312, 10.346296310424805, 19.976734161376953, 1.6292266845703125, 17.502166748046875, 123.06634521484375, 8.920394897460938, 30.93457794189453, 15.452404022216797, 3.4169769287109375, 1.8914241790771484, 51.89317321777344, 3.6048049926757812, 34.19324493408203, 25.816162109375, 9.918388366699219, -3.7947921752929688, 0.186431884765625, -0.5462436676025391, -18.973121643066406, 15.166000366210938, -1.6249237060546875, -0.24641990661621094, 9.293281555175781, -39.02606964111328, 21.629501342773438, -2.1470565795898438, 26.81167984008789, 4.966470718383789, -3.9042625427246094, -10.526718139648438, 11.008670806884766, 19.733322143554688, 24.947113037109375, -0.314727783203125, 46.189727783203125, 14.832405090332031, 3.519439697265625, 9.826736450195312, 8.82857894897461, 11.339317321777344, 37.092041015625, 18.87548828125, 23.567230224609375, 38.45885467529297, 16.273056030273438, 5.309474945068359, 16.648460388183594, 28.340240478515625, 26.863723754882812, -0.08452224731445312, -3.931365966796875, -4.740856170654297, 14.462291717529297], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000074.npy"}
|
|
{"epoch": 0.10866372980910426, "step": 75, "batch_size": 64, "mean": 20.119970321655273, "std": 20.181682586669922, "min": -11.9459228515625, "p10": -2.753778839111326, "median": 16.025936126708984, "p90": 49.341851806640626, "max": 74.392822265625, "pos_frac": 0.859375, "sample": [-0.48046112060546875, 24.164260864257812, 17.85321807861328, 14.60455322265625, 19.107749938964844, 12.249128341674805, 10.54758071899414, 6.589424133300781, 46.185791015625, 66.9130859375, 32.906494140625, 15.329681396484375, -7.2509002685546875, 27.612159729003906, 74.392822265625, 15.277469635009766, 41.3836669921875, 63.26518249511719, 9.27808952331543, -0.29730224609375, 44.17158508300781, 24.711990356445312, 15.113079071044922, 10.83709716796875, 16.828903198242188, 49.44410705566406, 49.10325622558594, 3.455608367919922, 8.531078338623047, 32.4765739440918, 19.178443908691406, 17.300029754638672, 4.266265869140625, 64.48870849609375, -5.4083099365234375, -3.728057861328125, 25.522857666015625, -11.9459228515625, 21.02277374267578, 20.236507415771484, 6.583301544189453, 12.576194763183594, -8.759651184082031, 8.551956176757812, -7.708229064941406, 29.109588623046875, 2.7033538818359375, 11.665176391601562, 39.231353759765625, 26.643890380859375, 3.8924636840820312, -7.063385009765625, 8.226726531982422, 7.446254730224609, 16.783626556396484, 5.517303466796875, 7.1454620361328125, 23.27716064453125, 15.705535888671875, 16.346336364746094, 38.09386444091797, 58.536956787109375, 61.168365478515625, 16.766189575195312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000075.npy"}
|
|
{"epoch": 0.11013215859030837, "step": 76, "batch_size": 64, "mean": 13.126352310180664, "std": 15.859684944152832, "min": -43.87799072265625, "p10": -2.080088424682617, "median": 12.94638442993164, "p90": 31.209701919555666, "max": 67.75559997558594, "pos_frac": 0.859375, "sample": [14.063547134399414, 6.083927154541016, 2.390779495239258, 25.433120727539062, 3.0202407836914062, 17.866668701171875, 10.76446533203125, 26.95245361328125, 31.499492645263672, 14.050605773925781, 15.842185974121094, 21.90645980834961, 10.301044464111328, 31.94068145751953, 4.572021484375, 5.695985794067383, 8.842195510864258, 67.75559997558594, 33.66449737548828, 5.796516418457031, -6.587127685546875, 12.939277648925781, 0.1433238983154297, 12.9534912109375, -15.207931518554688, 6.166769027709961, 5.474334716796875, 15.979141235351562, 6.545967102050781, 7.840000152587891, 30.23401641845703, 18.862831115722656, 22.580902099609375, 9.034568786621094, 28.410667419433594, 4.344287872314453, -2.1120529174804688, 26.588729858398438, 3.5011749267578125, -10.747642517089844, 6.835247039794922, 1.9568767547607422, -6.21251106262207, -6.759193420410156, 16.806541442871094, 30.533523559570312, 49.56782531738281, 28.31353759765625, 8.622058868408203, 5.7047576904296875, 13.440086364746094, -2.005504608154297, 15.887855529785156, 16.846343994140625, 14.897727966308594, 13.197029113769531, -0.5048179626464844, 13.826675415039062, 36.339332580566406, 23.580604553222656, 22.518264770507812, 32.926361083984375, 12.258720397949219, -43.87799072265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000076.npy"}
|
|
{"epoch": 0.11160058737151249, "step": 77, "batch_size": 64, "mean": 21.019004821777344, "std": 21.917591094970703, "min": -39.15985107421875, "p10": 0.5307651519775398, "median": 21.517589569091797, "p90": 55.26313018798828, "max": 75.76898193359375, "pos_frac": 0.90625, "sample": [16.345901489257812, 43.56529998779297, 32.6035041809082, 41.46567916870117, 41.81415557861328, 0.22499847412109375, 58.254119873046875, 21.875125885009766, 8.999139785766602, 10.212451934814453, 6.216306686401367, 34.47508239746094, 39.15325927734375, 5.858966827392578, 3.5500030517578125, 25.18639373779297, -39.15985107421875, 21.853591918945312, 34.440330505371094, 31.401657104492188, 14.017309188842773, -2.562877655029297, 9.834915161132812, 1.2442207336425781, 3.1591339111328125, 55.48548889160156, 3.3585987091064453, 25.887046813964844, 11.581657409667969, -5.037473678588867, 32.503360748291016, 29.579559326171875, 13.41384506225586, 64.93733215332031, 33.86470031738281, 10.094268798828125, 56.602325439453125, 75.76898193359375, 21.18158721923828, 12.412490844726562, -32.74198913574219, 8.003238677978516, -14.82598876953125, 3.674306869506836, 12.446332931518555, 26.149131774902344, 32.030364990234375, 25.43684959411621, 25.358154296875, 26.559242248535156, 7.326038360595703, -6.2017822265625, 5.246013641357422, 25.511322021484375, 26.537330627441406, 23.40985107421875, 4.1486968994140625, 56.34556579589844, 68.55601501464844, 11.759902954101562, 23.25936508178711, 54.744293212890625, 20.69204330444336, 6.12939453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000077.npy"}
|
|
{"epoch": 0.1130690161527166, "step": 78, "batch_size": 64, "mean": 16.267372131347656, "std": 18.734283447265625, "min": -21.639328002929688, "p10": -2.868338775634766, "median": 16.143047332763672, "p90": 41.60837936401368, "max": 83.21652221679688, "pos_frac": 0.8125, "sample": [3.1621551513671875, 1.6321048736572266, 40.356414794921875, 17.689781188964844, -1.3661537170410156, 46.44624328613281, 17.050804138183594, 83.21652221679688, 27.280685424804688, 22.857471466064453, 20.506954193115234, 8.103256225585938, -4.681011199951172, 14.697866439819336, 22.53701400756836, 20.41244888305664, -0.6624603271484375, 8.296985626220703, -5.882743835449219, 2.7755355834960938, 7.185747146606445, 18.25221061706543, 26.009685516357422, 23.217025756835938, 19.650157928466797, 14.570819854736328, 17.29617691040039, 18.572021484375, 22.692108154296875, 5.740257263183594, 42.144935607910156, 16.126482009887695, 5.7281494140625, -11.174488067626953, 25.656112670898438, -12.3145751953125, 53.363311767578125, 20.808738708496094, 43.48942565917969, -0.7083663940429688, 37.6179084777832, 3.5963668823242188, 48.86537170410156, 32.13457107543945, 29.663665771484375, 0.0113525390625, 10.883384704589844, 3.890901565551758, 12.257793426513672, 7.869106292724609, 6.10560417175293, -1.2686634063720703, 16.15961265563965, -2.8496627807617188, 31.300765991210938, -21.639328002929688, 51.164031982421875, 4.656410217285156, -17.9432373046875, 26.673614501953125, -2.8763427734375, 35.431884765625, 2.1403732299804688, 26.530487060546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000078.npy"}
|
|
{"epoch": 0.1145374449339207, "step": 79, "batch_size": 64, "mean": 17.853469848632812, "std": 22.85171127319336, "min": -22.199539184570312, "p10": -6.32409324645996, "median": 12.758567810058594, "p90": 50.00017852783205, "max": 74.46176147460938, "pos_frac": 0.734375, "sample": [-6.6805572509765625, 54.370826721191406, 6.288034439086914, 68.26095581054688, 4.780067443847656, -0.017612457275390625, -2.187040328979492, -10.574920654296875, 43.16815185546875, 37.866119384765625, 19.382843017578125, -22.199539184570312, 24.846214294433594, 70.52488708496094, 9.883743286132812, 25.215530395507812, 73.96607971191406, 51.89088439941406, 17.52076530456543, -9.978042602539062, 74.46176147460938, 17.163360595703125, -3.3879852294921875, -11.224884033203125, 28.303497314453125, 1.2109088897705078, -1.06622314453125, 16.396942138671875, 13.720909118652344, 62.42327117919922, 3.5626068115234375, 8.643274307250977, 11.796226501464844, 27.24681854248047, -11.431915283203125, 10.730751037597656, -0.8031196594238281, 6.532829284667969, 41.050445556640625, 40.881744384765625, 19.60312271118164, 31.88372039794922, 33.57475280761719, 19.938907623291016, 26.287277221679688, -5.35188102722168, 2.9867401123046875, 31.968521118164062, 37.73186492919922, 10.841547012329102, 15.402921676635742, -5.492343902587891, -1.4475860595703125, 38.822540283203125, -1.2151947021484375, 21.963531494140625, 5.289371490478516, 6.645597457885742, 0.17804718017578125, -8.819786071777344, 7.315328598022461, 45.588531494140625, -3.592235565185547, 19.980056762695312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000079.npy"}
|
|
{"epoch": 0.11600587371512482, "step": 80, "batch_size": 64, "mean": 16.589031219482422, "std": 23.739376068115234, "min": -23.32978057861328, "p10": -6.744988250732422, "median": 11.739740371704102, "p90": 42.00306777954101, "max": 95.39871215820312, "pos_frac": 0.78125, "sample": [27.0631103515625, 8.628719329833984, -6.574180603027344, 24.553672790527344, 5.648332595825195, 3.875608444213867, -3.0653419494628906, -9.591171264648438, 31.862136840820312, 2.718414306640625, 4.2288970947265625, 16.393421173095703, -15.920093536376953, -20.669090270996094, -6.470558166503906, 23.90381622314453, 95.39871215820312, 38.334938049316406, 83.09039306640625, 45.858184814453125, 25.155059814453125, 6.899394989013672, 41.87947082519531, 42.05603790283203, 34.377655029296875, 15.633132934570312, -0.9210186004638672, 32.96031188964844, 14.778018951416016, 10.72784423828125, 2.6252670288085938, -16.656204223632812, 4.264385223388672, -6.8181915283203125, 25.254852294921875, -23.32978057861328, 59.36228942871094, 31.237144470214844, -7.815166473388672, 88.42425537109375, 10.50459098815918, 3.6124267578125, 2.609844207763672, 2.0628509521484375, 45.32946014404297, 12.751636505126953, 23.80097198486328, 29.197982788085938, 16.15595817565918, 10.394485473632812, -3.8429737091064453, -3.0315113067626953, 0.7374668121337891, 6.8107757568359375, 23.409643173217773, 22.914627075195312, 6.015281677246094, 3.6497802734375, 32.96954345703125, 24.66059112548828, 14.164260864257812, 38.125244140625, -5.619632720947266, 14.952003479003906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000080.npy"}
|
|
{"epoch": 0.11747430249632893, "step": 81, "batch_size": 64, "mean": 23.198793411254883, "std": 28.119565963745117, "min": -29.968536376953125, "p10": -9.227621459960936, "median": 19.40678596496582, "p90": 58.72273712158203, "max": 117.6217041015625, "pos_frac": 0.859375, "sample": [42.82927703857422, 32.93511962890625, -29.968536376953125, 40.11962127685547, 24.856311798095703, 58.77317810058594, 19.073810577392578, -11.949832916259766, 7.721519470214844, 33.43229675292969, 31.794525146484375, -22.632064819335938, 2.4100189208984375, 24.98345947265625, 87.9620361328125, 10.45986557006836, 23.469390869140625, 37.747650146484375, 58.90673828125, -16.127647399902344, 1.1670722961425781, 9.294677734375, 21.65593719482422, 72.5998764038086, 23.331951141357422, 11.99435806274414, 18.45184326171875, 8.183902740478516, 7.34625244140625, -8.127281188964844, 26.188507080078125, 7.778289794921875, 4.855442047119141, 12.917877197265625, 29.455703735351562, 117.6217041015625, 42.42374038696289, 19.739761352539062, 2.9915924072265625, 58.60504150390625, 42.881988525390625, 49.0244140625, 16.047454833984375, 0.3250160217285156, 4.903873443603516, -2.608234405517578, -19.584491729736328, 14.870384216308594, 14.510276794433594, 30.605636596679688, 14.933551788330078, 12.898683547973633, 21.005409240722656, -14.111274719238281, 38.79112243652344, 46.27384948730469, 109.5013427734375, 60.099609375, 9.869392395019531, 34.79712677001953, 3.2554168701171875, 27.297286987304688, -9.699195861816406, 33.5611572265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000081.npy"}
|
|
{"epoch": 0.11894273127753303, "step": 82, "batch_size": 64, "mean": 18.392139434814453, "std": 30.29347801208496, "min": -60.26139831542969, "p10": -16.237848663330073, "median": 16.736846923828125, "p90": 51.392615509033206, "max": 123.43014526367188, "pos_frac": 0.796875, "sample": [-8.479690551757812, 32.81327819824219, -19.816696166992188, 18.020000457763672, 21.05221939086914, 18.589815139770508, -26.352272033691406, 16.160350799560547, 26.013015747070312, 58.01152801513672, 69.05517578125, 13.27166748046875, -12.815853118896484, 7.8688812255859375, 15.747331619262695, 106.04322814941406, 13.496414184570312, -53.749542236328125, -9.511993408203125, 34.006378173828125, 2.0990982055664062, 32.77320098876953, 51.88971710205078, 3.7428951263427734, -17.704418182373047, -6.064605712890625, 3.1017074584960938, 23.702537536621094, 21.500328063964844, 13.140655517578125, 20.294532775878906, 36.46649169921875, 37.821022033691406, -0.9211330413818359, 74.00370788574219, 19.011310577392578, 50.23271179199219, 15.658184051513672, 9.329959869384766, 5.248382568359375, -1.9413833618164062, 29.207626342773438, 59.76763916015625, -18.295028686523438, 29.145736694335938, 22.42981719970703, 7.350439071655273, 47.183719635009766, 34.190528869628906, -21.693878173828125, 10.607933044433594, 7.405242919921875, -60.26139831542969, 7.503929138183594, 32.71094512939453, 22.168649673461914, 5.980155944824219, 26.651901245117188, 16.906890869140625, 123.43014526367188, 16.566802978515625, 18.16412925720215, 0.39875030517578125, 46.76811981201172], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000082.npy"}
|
|
{"epoch": 0.12041116005873716, "step": 83, "batch_size": 64, "mean": 21.821531295776367, "std": 26.633377075195312, "min": -26.29471206665039, "p10": -9.775568389892575, "median": 17.994450569152832, "p90": 63.7740417480469, "max": 86.46343994140625, "pos_frac": 0.8125, "sample": [21.23792266845703, 25.225269317626953, 18.212329864501953, 72.4052734375, 45.61009979248047, 12.691444396972656, -7.2711639404296875, 70.67916870117188, 57.564788818359375, 5.479972839355469, 12.366058349609375, 17.77657127380371, 1.625396728515625, -4.7454376220703125, 18.517200469970703, 69.85057067871094, -14.021102905273438, 86.46343994140625, 11.208343505859375, 74.82930755615234, 38.56803894042969, 12.165901184082031, 51.995025634765625, -20.25049591064453, -26.29471206665039, -10.848884582519531, 55.3824462890625, -20.844276428222656, 17.381826400756836, 66.43515014648438, 11.962875366210938, 15.194900512695312, 5.542562484741211, 1.08258056640625, 25.47808074951172, 13.412559509277344, 26.605609893798828, -0.40076637268066406, 40.164649963378906, 13.441452026367188, 20.505035400390625, 18.599029541015625, 44.525482177734375, 72.72866821289062, 19.55359649658203, 48.59779357910156, 4.814891815185547, -19.05902862548828, -2.5124073028564453, 2.5320968627929688, -22.407882690429688, 51.556640625, 9.509025573730469, 42.44886779785156, 24.74822235107422, 24.05926513671875, -6.645111083984375, 8.128803253173828, 18.430484771728516, 32.09977722167969, 43.93415832519531, 17.71289825439453, 28.198829650878906, 2.638957977294922], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000083.npy"}
|
|
{"epoch": 0.12187958883994127, "step": 84, "batch_size": 64, "mean": 19.046039581298828, "std": 20.50779151916504, "min": -43.45240783691406, "p10": -1.5618135452270507, "median": 16.34085178375244, "p90": 42.84121398925782, "max": 81.66996765136719, "pos_frac": 0.84375, "sample": [7.456657409667969, 15.85369873046875, 4.027252197265625, 5.429876327514648, 47.873748779296875, 5.2699737548828125, 5.1595916748046875, 20.702049255371094, 16.286108016967773, 9.114418029785156, 30.01663589477539, 35.931785583496094, -20.667938232421875, 14.792831420898438, 22.204795837402344, 30.675552368164062, 10.655660629272461, 81.66996765136719, 31.11532974243164, 2.502410888671875, 7.323518753051758, 29.939682006835938, 7.963737487792969, 6.621232986450195, -0.8724784851074219, 41.718994140625, 26.030540466308594, 32.45860290527344, -43.45240783691406, -1.5957927703857422, 54.53254699707031, 3.4486007690429688, 36.661014556884766, 16.39559555053711, 18.458770751953125, 7.379974365234375, 48.26905822753906, 43.26324462890625, 31.77587890625, 21.88916778564453, 2.9420394897460938, 32.000389099121094, -4.131219863891602, 15.8614501953125, 29.9552001953125, 29.424171447753906, -1.4825286865234375, -2.9820022583007812, 34.24287414550781, 33.51531982421875, -2.1777191162109375, 41.856475830078125, 55.77879333496094, 20.089561462402344, 61.8057861328125, 15.37628173828125, 30.54730987548828, 8.017967224121094, -16.560516357421875, 31.01915740966797, 12.933588027954102, 18.90399169921875, 8.015308380126953, -0.2850017547607422], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000084.npy"}
|
|
{"epoch": 0.12334801762114538, "step": 85, "batch_size": 64, "mean": 19.524394989013672, "std": 24.67206573486328, "min": -30.72986602783203, "p10": -4.865496826171874, "median": 17.839513778686523, "p90": 58.81364364624025, "max": 69.93470764160156, "pos_frac": 0.71875, "sample": [43.24617004394531, 15.173118591308594, 30.307952880859375, 3.6665401458740234, -2.1275177001953125, 22.78853988647461, 26.041854858398438, 2.024242401123047, 60.335655212402344, 9.329032897949219, -2.4419727325439453, 4.633811950683594, 64.74588012695312, 25.21088409423828, 24.500120162963867, 21.58791732788086, -4.3871002197265625, -18.06635284423828, 20.407638549804688, 0.8235092163085938, -30.72986602783203, 69.93470764160156, 18.398971557617188, -1.9404850006103516, -4.45452880859375, 45.774322509765625, 7.496772766113281, -8.602096557617188, 18.726211547851562, 10.707748413085938, 47.55085754394531, -5.0416259765625, -17.561553955078125, -1.992776870727539, 14.205482482910156, 5.30012321472168, 64.85150146484375, 17.416973114013672, 38.1866455078125, -0.38374900817871094, -4.340639114379883, 69.42839050292969, -2.1169185638427734, 45.28724670410156, 60.8873291015625, 15.146453857421875, 55.26228332519531, 66.48115539550781, 10.15484619140625, 5.929767608642578, 32.19794464111328, 49.828163146972656, -0.3976593017578125, -7.397773742675781, 18.262054443359375, 19.166093826293945, 53.40986633300781, 21.183258056640625, 30.273330688476562, 36.93819046020508, -22.373626708984375, 18.783985137939453, -1.2527732849121094, 43.176734924316406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000085.npy"}
|
|
{"epoch": 0.12481644640234948, "step": 86, "batch_size": 64, "mean": 20.703155517578125, "std": 37.65457534790039, "min": -57.59767150878906, "p10": -15.5985050201416, "median": 15.043901443481445, "p90": 65.94494781494141, "max": 145.01016235351562, "pos_frac": 0.734375, "sample": [-13.245994567871094, 18.06822967529297, 68.34669494628906, -19.80025863647461, 35.565956115722656, 30.266990661621094, 27.25310707092285, -2.085662841796875, 104.5968017578125, 11.836467742919922, 6.667461395263672, -5.265556335449219, 0.9765472412109375, 21.861907958984375, -10.36920166015625, 19.172042846679688, 48.55000305175781, 19.797500610351562, -2.893230438232422, -50.9091796875, 63.17657470703125, 21.81039810180664, 32.03121566772461, 10.037843704223633, 10.662353515625, -16.60672378540039, -3.9500885009765625, 67.13139343261719, -4.553836822509766, 7.92957878112793, 38.23799514770508, 18.611080169677734, 7.853107452392578, -57.59767150878906, 45.41259765625, 54.230682373046875, 27.65264892578125, -39.70655059814453, 74.01417541503906, 3.0904598236083984, 145.01016235351562, 18.329814910888672, 25.312454223632812, 12.019573211669922, -4.640106201171875, 127.56109619140625, 6.7181243896484375, 121.38037109375, 46.76622009277344, -7.0804443359375, -31.41704559326172, 39.18089294433594, 9.268716812133789, 34.862281799316406, 29.446151733398438, 36.11820602416992, 36.93616485595703, 5.519510269165039, 1.936309814453125, 4.37293815612793, -5.105993270874023, -17.91211700439453, 2.7682037353515625, 19.79253387451172], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000086.npy"}
|
|
{"epoch": 0.1262848751835536, "step": 87, "batch_size": 64, "mean": 24.061767578125, "std": 32.99917984008789, "min": -73.2835693359375, "p10": -7.940727233886718, "median": 20.0267276763916, "p90": 64.77524414062502, "max": 127.81517028808594, "pos_frac": 0.8125, "sample": [23.0114803314209, 36.17436218261719, 0.5291957855224609, 9.531679153442383, 37.4595947265625, -8.342071533203125, 29.73540496826172, -0.5953998565673828, 2.0038833618164062, -3.789093017578125, -24.208953857421875, 45.84275817871094, -6.848777770996094, 66.58474731445312, 30.67266082763672, 58.89936828613281, 52.91761779785156, 10.684965133666992, 90.90704345703125, 58.91326904296875, 20.773529052734375, 19.600357055664062, 7.7924957275390625, 40.95630645751953, 39.86400604248047, 2.2185306549072266, 38.590782165527344, 127.81517028808594, 13.819068908691406, 2.9153366088867188, 13.078126907348633, 74.799072265625, 59.27046203613281, 9.73178482055664, 56.400238037109375, 25.68871307373047, 13.724794387817383, 39.66257095336914, 20.236194610595703, 73.24845886230469, 0.469879150390625, 78.78927612304688, 15.110977172851562, 19.8172607421875, -73.2835693359375, 11.469369888305664, 40.30597686767578, -34.644317626953125, 38.23097229003906, 69.9071044921875, -16.858325958251953, -16.69973373413086, 8.690122604370117, 19.53759002685547, -7.0042572021484375, 60.553070068359375, 31.70806884765625, 33.82264709472656, -37.393341064453125, 23.588951110839844, 15.700706481933594, 1.275796890258789, 51.68479919433594, -5.095623016357422], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000087.npy"}
|
|
{"epoch": 0.1277533039647577, "step": 88, "batch_size": 64, "mean": 18.192121505737305, "std": 31.818187713623047, "min": -58.583587646484375, "p10": -8.680522155761718, "median": 11.36335277557373, "p90": 58.73482818603519, "max": 129.30908203125, "pos_frac": 0.78125, "sample": [10.224441528320312, 11.42181396484375, 63.96421813964844, 61.884857177734375, 34.086021423339844, 48.17205810546875, 3.3592491149902344, 61.86534118652344, -7.1773529052734375, 12.955507278442383, 32.64154052734375, 7.1772308349609375, 1.87890625, 47.44628143310547, -22.886306762695312, 31.2918701171875, -20.653106689453125, 15.631332397460938, -46.85285949707031, -58.583587646484375, -5.046075820922852, 75.2921142578125, 36.299278259277344, -4.58367919921875, 44.19892883300781, 5.627523422241211, 27.74560546875, -9.324737548828125, 22.666311264038086, 21.77788543701172, 40.009063720703125, 7.001564025878906, -1.5423259735107422, 27.148513793945312, 1.4930572509765625, 93.34625244140625, 10.446189880371094, 51.4302978515625, 4.588714599609375, 129.30908203125, 5.139991760253906, 44.13361358642578, 4.150993347167969, 14.280397415161133, 10.7442626953125, 23.163299560546875, 1.4702262878417969, -3.0640907287597656, 33.19969177246094, 80.47262573242188, 34.438819885253906, 15.671539306640625, 0.33881378173828125, 9.858978271484375, -52.10382080078125, -5.515167236328125, 10.650510787963867, -1.496225357055664, 11.304891586303711, 15.174318313598633, 18.529584884643555, -20.8260498046875, 8.832901000976562, 40.0146484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000088.npy"}
|
|
{"epoch": 0.12922173274596183, "step": 89, "batch_size": 64, "mean": 20.458881378173828, "std": 27.08057975769043, "min": -63.39544677734375, "p10": -2.3469511032104484, "median": 16.435709953308105, "p90": 48.460848236084, "max": 118.79606628417969, "pos_frac": 0.859375, "sample": [4.9568634033203125, 1.6433849334716797, 5.5114288330078125, 20.6496639251709, -9.456489562988281, 49.845855712890625, 40.244667053222656, 16.482765197753906, -8.296409606933594, 57.05327606201172, 57.53154754638672, 22.950695037841797, 22.395309448242188, 45.229164123535156, 36.02806854248047, 29.785987854003906, 12.929405212402344, 10.256210327148438, -2.654356002807617, 19.687702178955078, 14.682510375976562, 2.941469192504883, 16.388654708862305, 12.060955047607422, -10.548477172851562, 1.8405494689941406, 1.50531005859375, 10.335538864135742, 73.69931030273438, 18.852218627929688, 27.50757598876953, 88.91009521484375, 16.35584259033203, 44.343101501464844, 21.61831283569336, 2.6314544677734375, 12.59054183959961, 118.79606628417969, 4.1060333251953125, -1.6296730041503906, 29.07931900024414, -0.19501876831054688, 21.228456497192383, 30.302013397216797, 16.342485427856445, 5.827545166015625, 9.076013565063477, 4.76251220703125, 4.34454345703125, 34.912513732910156, 20.910430908203125, 40.481903076171875, -2.827014923095703, 16.355384826660156, 42.99437713623047, -63.39544677734375, -25.269058227539062, 27.961708068847656, 17.987323760986328, 21.840099334716797, 24.157012939453125, 24.09351348876953, 91.61164855957031, 7.024101257324219], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000089.npy"}
|
|
{"epoch": 0.13069016152716592, "step": 90, "batch_size": 64, "mean": 27.817413330078125, "std": 41.238189697265625, "min": -50.24147033691406, "p10": -10.123895263671871, "median": 18.199609756469727, "p90": 91.09896850585939, "max": 146.428955078125, "pos_frac": 0.796875, "sample": [65.9423828125, -43.147315979003906, 29.53270721435547, 93.29824829101562, 22.42001724243164, 35.416656494140625, 1.0525474548339844, -4.303424835205078, 58.286102294921875, -1.235076904296875, 45.01789093017578, 17.991649627685547, -0.597442626953125, 13.036109924316406, 38.01982116699219, 85.89848327636719, -4.4951934814453125, 146.428955078125, 52.01654052734375, 2.077442169189453, 2.9022769927978516, 10.125221252441406, 18.407569885253906, -11.869773864746094, 115.00048828125, 16.36245346069336, 24.875343322753906, 119.29403686523438, 4.821964263916016, 22.580867767333984, 136.6070556640625, 21.89220428466797, -25.841476440429688, 11.60478401184082, -6.050178527832031, -50.24147033691406, 27.388652801513672, 7.786643981933594, 61.811676025390625, 5.967201232910156, 13.880104064941406, 10.892776489257812, -23.046463012695312, -5.079343795776367, 20.845199584960938, 85.96731567382812, 99.3575439453125, -36.156715393066406, -30.239364624023438, 11.779045104980469, 35.540809631347656, 1.602987289428711, 18.552024841308594, 36.084144592285156, 68.13015747070312, 32.36791229248047, 10.42060661315918, 9.920938491821289, 60.42730712890625, 16.423503875732422, 23.676429748535156, 16.715429306030273, 95.27911376953125, 40.890281677246094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000090.npy"}
|
|
{"epoch": 0.13215859030837004, "step": 91, "batch_size": 64, "mean": 23.0252685546875, "std": 33.885406494140625, "min": -33.904457092285156, "p10": -19.02871170043945, "median": 21.440811157226562, "p90": 68.12580261230472, "max": 120.80810546875, "pos_frac": 0.75, "sample": [17.821449279785156, 52.671844482421875, -32.603302001953125, 7.594470977783203, 72.44447326660156, 0.35076332092285156, -17.22887420654297, 43.7656364440918, 3.952951431274414, 20.2908935546875, 41.91484069824219, 39.504127502441406, 42.356117248535156, 13.92926025390625, -33.904457092285156, 107.83404541015625, -12.0364990234375, 2.0270767211914062, 8.569793701171875, 85.50306701660156, 9.975479125976562, 33.254459381103516, 120.80810546875, -16.625167846679688, 27.79150390625, 53.64177703857422, -19.42737579345703, 3.0146522521972656, 22.43218994140625, -5.525032043457031, -18.098495483398438, 22.47066879272461, 58.04890441894531, -21.12652587890625, 18.450759887695312, 21.403648376464844, 55.375343322753906, -13.673408508300781, 21.47797393798828, -5.166284561157227, 8.136070251464844, 24.702529907226562, 57.96284484863281, 1.968231201171875, -19.44367218017578, 36.784088134765625, 22.96764373779297, 78.34574890136719, 25.92394256591797, 41.223121643066406, 11.054145812988281, -25.454673767089844, 0.9416294097900391, 27.75762176513672, -2.131345748901367, 42.226966857910156, -0.5609607696533203, 84.34805297851562, 44.4744873046875, 23.98858642578125, 51.59953308105469, -24.77149200439453, 47.558837890625, 80.75432586669922], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000091.npy"}
|
|
{"epoch": 0.13362701908957417, "step": 92, "batch_size": 64, "mean": 23.284170150756836, "std": 33.0507698059082, "min": -46.34049987792969, "p10": -11.360004043579101, "median": 16.738669395446777, "p90": 57.28439636230469, "max": 125.57803344726562, "pos_frac": 0.75, "sample": [-4.7585906982421875, 4.934959411621094, 8.110305786132812, 46.139060974121094, -8.745025634765625, 6.2264251708984375, -46.34049987792969, -15.754194259643555, 46.300193786621094, 24.220977783203125, 65.45367431640625, 0.30835914611816406, 42.79931640625, 2.509021759033203, -5.533561706542969, 109.36367797851562, 54.794952392578125, 125.57803344726562, -40.26580810546875, -6.1319427490234375, 25.964466094970703, -10.820293426513672, 27.165435791015625, -8.706546783447266, -17.721229553222656, 57.28565979003906, -11.59130859375, 72.49307250976562, 29.399635314941406, 5.537885665893555, 71.7579574584961, -2.3123321533203125, 34.612449645996094, 0.6806011199951172, 51.48616027832031, 8.450759887695312, 31.590896606445312, 103.15449523925781, 9.895013809204102, 57.28144836425781, 42.569091796875, 56.6328125, -0.513336181640625, -5.04229736328125, 8.934860229492188, 24.68175506591797, 4.902801513671875, 40.418731689453125, 34.10607147216797, 11.50296401977539, 0.24170684814453125, -12.842924118041992, 53.0255012512207, 15.889856338500977, 17.587482452392578, 13.823692321777344, 35.32127380371094, 51.35826110839844, 42.08381652832031, 23.9010009765625, 11.168386459350586, -12.502151489257812, 53.90874481201172, 34.21510314941406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000092.npy"}
|
|
{"epoch": 0.13509544787077826, "step": 93, "batch_size": 64, "mean": 23.431915283203125, "std": 29.27066421508789, "min": -35.55768585205078, "p10": -3.6619720458984357, "median": 16.33733367919922, "p90": 57.50738983154297, "max": 148.38644409179688, "pos_frac": 0.828125, "sample": [19.574256896972656, 9.071245193481445, 26.094207763671875, 62.110801696777344, 57.701271057128906, 54.18620300292969, 5.0331268310546875, 22.677082061767578, 43.506797790527344, -13.58868408203125, 72.74752807617188, 18.88079833984375, 50.78375244140625, 50.43920135498047, 148.38644409179688, 13.521808624267578, -1.7630767822265625, 5.8647918701171875, 3.7880630493164062, 86.82678985595703, -8.590980529785156, 12.735280990600586, 21.355754852294922, -1.00311279296875, 33.735504150390625, 10.159286499023438, 29.253150939941406, 6.034049987792969, -1.5503311157226562, 12.145431518554688, -0.6153812408447266, 14.217948913574219, 9.322376251220703, 20.29745864868164, 33.306129455566406, 52.2052001953125, 65.33577728271484, -23.35546875, -4.4757843017578125, 48.49732971191406, 43.69341278076172, 35.03335952758789, 35.207489013671875, 6.61163330078125, 10.376115798950195, 8.921089172363281, 30.08367919921875, 2.60626220703125, 57.05500030517578, 0.5389251708984375, -7.378211975097656, 58.473915100097656, 6.464546203613281, 16.479415893554688, 5.759532928466797, 6.0829315185546875, -18.796241760253906, 31.365150451660156, 44.824668884277344, 34.866798400878906, 45.86094665527344, 16.19525146484375, -35.55768585205078, 0.022504806518554688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000093.npy"}
|
|
{"epoch": 0.13656387665198239, "step": 94, "batch_size": 64, "mean": 26.934350967407227, "std": 30.795570373535156, "min": -22.453811645507812, "p10": -6.913602066040039, "median": 23.203824996948242, "p90": 67.63926315307617, "max": 110.75592041015625, "pos_frac": 0.78125, "sample": [26.09854507446289, -16.73436737060547, 43.32524108886719, 19.300399780273438, 6.5215301513671875, -14.409011840820312, 10.135879516601562, 60.51530075073242, 5.559183120727539, -20.316253662109375, 23.094301223754883, 12.8502197265625, 25.348861694335938, 6.0890350341796875, 11.519477844238281, 0.33825111389160156, 57.90008544921875, 12.387874603271484, -6.805004119873047, 110.75592041015625, 5.734806060791016, 30.09549331665039, 31.81047821044922, 53.598724365234375, 77.78530883789062, 23.543731689453125, 13.931114196777344, 29.23589324951172, 21.193851470947266, -3.0518417358398438, -3.859579086303711, 17.08903694152832, 35.29004669189453, 57.733154296875, 43.330501556396484, 18.712779998779297, 36.46467590332031, 67.68115997314453, 33.963958740234375, 23.3133487701416, 5.440196990966797, 44.438148498535156, -4.180870056152344, -16.6304931640625, 67.54150390625, 73.68475341796875, -6.762174606323242, 59.25282287597656, 110.21200561523438, -6.96014404296875, -22.453811645507812, 86.46659851074219, 31.180625915527344, 56.62451171875, -3.423431396484375, 17.254608154296875, -14.742599487304688, -5.630268096923828, 35.929420471191406, 14.845657348632812, 35.81330490112305, 56.97346496582031, 72.02386474609375, 49.82861328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000094.npy"}
|
|
{"epoch": 0.13803230543318648, "step": 95, "batch_size": 64, "mean": 21.93939208984375, "std": 29.070507049560547, "min": -71.04444885253906, "p10": -6.787653350830078, "median": 16.472951889038086, "p90": 61.81281585693362, "max": 91.09779357910156, "pos_frac": 0.828125, "sample": [16.764007568359375, 42.38111114501953, 4.593009948730469, 22.328731536865234, 19.258821487426758, 69.20269775390625, 8.305618286132812, 6.470293045043945, -7.810333251953125, 15.6015625, 19.48042106628418, 4.437076568603516, 34.972347259521484, 80.47328186035156, 50.47126770019531, -6.070899963378906, 38.38386535644531, 2.987760543823242, 15.095125198364258, 45.889556884765625, 22.92870330810547, 39.874237060546875, 20.871179580688477, 15.120698928833008, 9.76392936706543, -11.409622192382812, 10.418594360351562, 52.124977111816406, 20.229473114013672, 28.824607849121094, -9.99542236328125, -7.0948333740234375, 53.61882019042969, 91.09779357910156, 75.32830810546875, 56.58811950683594, 35.798126220703125, 1.6161365509033203, 1.1468124389648438, 64.05197143554688, 45.96049118041992, 80.39376831054688, 70.55656433105469, 3.7011566162109375, 14.330053329467773, 8.44035530090332, -1.6381053924560547, 31.910913467407227, 29.1630859375, -71.04444885253906, 2.5819149017333984, 2.741220474243164, -29.938507080078125, 46.7188720703125, 43.20276641845703, 13.173274993896484, 40.22686767578125, 12.051851272583008, 16.751842498779297, 16.194061279296875, -5.108545303344727, -2.726198196411133, 15.366989135742188, -33.00695037841797], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000095.npy"}
|
|
{"epoch": 0.1395007342143906, "step": 96, "batch_size": 64, "mean": 29.912555694580078, "std": 35.41636657714844, "min": -54.19017028808594, "p10": -0.3928052902221679, "median": 20.77998924255371, "p90": 78.78724136352541, "max": 153.5980987548828, "pos_frac": 0.875, "sample": [62.63063049316406, 17.93891143798828, 37.97425842285156, 31.139026641845703, 20.309677124023438, 62.575836181640625, 6.3060150146484375, 2.9877243041992188, 1.511606216430664, 25.747894287109375, 109.98143005371094, 16.359771728515625, 10.283645629882812, -3.985057830810547, 1.9467201232910156, 21.250301361083984, 29.115890502929688, 44.40338134765625, 86.90303802490234, 88.80293273925781, 9.590248107910156, 44.56421661376953, 35.99413299560547, 153.5980987548828, 73.36640167236328, 15.213672637939453, 21.547828674316406, 10.927650451660156, 15.148712158203125, 16.33538055419922, 9.651969909667969, 134.56228637695312, 32.99229431152344, 28.90064239501953, 5.647361755371094, 87.57562255859375, 57.08331298828125, 6.119758605957031, 36.40824890136719, 1.9552059173583984, 45.71538543701172, 27.20539665222168, 22.849403381347656, -9.663284301757812, 14.782184600830078, -0.32114410400390625, -0.5985870361328125, 39.68346405029297, -0.42351722717285156, 25.734588623046875, 12.238510131835938, 57.16307067871094, -14.140003204345703, 6.887176513671875, 64.78916931152344, 10.637527465820312, 17.293725967407227, 62.14717102050781, 81.11045837402344, 2.0589447021484375, -0.7487354278564453, 31.18657684326172, -54.19017028808594, 1.6395950317382812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000096.npy"}
|
|
{"epoch": 0.14096916299559473, "step": 97, "batch_size": 64, "mean": 23.000316619873047, "std": 27.659061431884766, "min": -59.801918029785156, "p10": -7.976758956909177, "median": 21.988481521606445, "p90": 55.99185256958008, "max": 106.21640014648438, "pos_frac": 0.78125, "sample": [17.332717895507812, 52.07530212402344, 2.108795166015625, 1.05291748046875, -4.562038421630859, -2.96453857421875, 15.600582122802734, 27.91274070739746, 2.085773468017578, 47.263267517089844, 22.99441146850586, 40.72913360595703, -0.8938007354736328, 18.086483001708984, 78.79345703125, 60.490264892578125, -20.352935791015625, 34.22294616699219, 0.792816162109375, 13.401906967163086, 27.08056640625, 6.262630462646484, 15.770389556884766, -21.438217163085938, 70.1673583984375, 20.035852432250977, 25.445831298828125, 30.172897338867188, 41.165977478027344, 14.752330780029297, 27.56029510498047, -5.603794097900391, 17.435211181640625, -11.565359115600586, 48.076499938964844, 41.0870361328125, 49.044639587402344, 29.113170623779297, -59.801918029785156, -8.993743896484375, 47.02106475830078, 31.930805206298828, -11.358016967773438, 7.497951507568359, 54.20087432861328, 35.41409683227539, 1.5048542022705078, 21.009923934936523, 22.967039108276367, 17.222640991210938, -4.352994918823242, 56.75941467285156, -9.333574295043945, 49.67306900024414, -1.0345115661621094, 62.46173095703125, -1.1292343139648438, 43.80635070800781, 48.712486267089844, 63.144622802734375, 32.58882522583008, 9.45937728881836, 106.21640014648438, 25.699172973632812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000097.npy"}
|
|
{"epoch": 0.14243759177679882, "step": 98, "batch_size": 64, "mean": 26.44017791748047, "std": 36.65676498413086, "min": -101.72747802734375, "p10": -12.75349197387695, "median": 28.312896728515625, "p90": 67.85121002197266, "max": 125.05584716796875, "pos_frac": 0.796875, "sample": [41.42652893066406, 5.962932586669922, 66.78483581542969, 43.311866760253906, 6.542634963989258, 20.344873428344727, 23.466598510742188, 26.53246307373047, 29.332717895507812, 7.453620910644531, 30.89134407043457, -7.100128173828125, 20.486011505126953, 23.88842010498047, 66.75914001464844, 47.830169677734375, -14.136043548583984, 3.5022735595703125, -0.771881103515625, -22.81591796875, 40.42275619506836, 35.336021423339844, -29.774612426757812, -37.50923156738281, 55.179595947265625, 53.69546890258789, 30.965011596679688, 68.3082275390625, 68.83314514160156, 26.68596839904785, 40.85108947753906, -2.7983779907226562, 60.826744079589844, 125.05584716796875, -101.72747802734375, 83.72453308105469, 58.30641174316406, 27.293075561523438, 46.29362487792969, 17.76038360595703, -9.527538299560547, 40.364501953125, -55.86943054199219, 25.67687225341797, 38.75177001953125, 3.4631881713867188, 39.450469970703125, 81.88424682617188, 60.865753173828125, 36.63277053833008, 15.87774658203125, 9.15826416015625, 72.52142333984375, 103.7564697265625, 34.94605255126953, 31.70120620727539, 30.214981079101562, 13.365728378295898, 3.9307022094726562, -9.110126495361328, 54.43163299560547, -3.7505416870117188, 3.5270042419433594, -17.51245880126953], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000098.npy"}
|
|
{"epoch": 0.14390602055800295, "step": 99, "batch_size": 64, "mean": 32.21813201904297, "std": 40.565940856933594, "min": -31.99970245361328, "p10": -7.269179534912109, "median": 22.191299438476562, "p90": 81.61805419921875, "max": 173.73838806152344, "pos_frac": 0.828125, "sample": [-0.6377658843994141, -8.479316711425781, 6.205972671508789, 20.387920379638672, 4.1863861083984375, 41.619651794433594, 26.099334716796875, 29.313217163085938, 106.23239135742188, 20.75615119934082, 23.06281280517578, 121.65399169921875, 8.592586517333984, -29.396011352539062, 81.75663757324219, -3.95361328125, 22.76660919189453, 37.236968994140625, -1.1230201721191406, 17.637657165527344, 72.8218994140625, 15.014175415039062, 11.031999588012695, 158.7764892578125, 9.591379165649414, 58.24620056152344, 2.7319412231445312, -24.764259338378906, 10.673341751098633, -31.99970245361328, -9.279716491699219, 53.19493103027344, 38.422157287597656, 25.43295669555664, 63.97300720214844, 173.73838806152344, 73.73570251464844, 36.65651321411133, 17.342864990234375, 19.46051025390625, 4.920806884765625, 30.082393646240234, 38.04676818847656, 81.29469299316406, 99.9983139038086, 5.2559661865234375, 12.476791381835938, 65.97190856933594, -6.095176696777344, 0.9330234527587891, 32.08808898925781, 86.35372924804688, 30.25063705444336, 10.37130355834961, 55.11131286621094, -7.7723236083984375, 41.88068771362305, 14.165655136108398, 65.66644287109375, 51.096282958984375, 21.615989685058594, 46.863502502441406, 3.4609031677246094, -20.79657745361328], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000099.npy"}
|
|
{"epoch": 0.14537444933920704, "step": 100, "batch_size": 64, "mean": 18.931367874145508, "std": 35.162384033203125, "min": -73.22457885742188, "p10": -19.56691341400146, "median": 19.68370819091797, "p90": 63.28380966186525, "max": 139.45697021484375, "pos_frac": 0.765625, "sample": [53.64103698730469, 30.61054801940918, -6.158710479736328, 15.226375579833984, 22.705699920654297, -7.70367431640625, 33.02729797363281, 25.38829803466797, 7.2391815185546875, 39.7296142578125, 11.170814514160156, 0.9943103790283203, 37.77886962890625, 23.474334716796875, 4.55540657043457, 77.47323608398438, 19.58533477783203, -44.935516357421875, -29.993545532226562, -52.59576416015625, -14.095590591430664, -10.029121398925781, 1.4527606964111328, -61.118839263916016, 55.58650588989258, 20.049644470214844, 49.388824462890625, 24.28375244140625, 1.300058364868164, 6.270502090454102, 42.87212371826172, -3.2955169677734375, 44.85387420654297, 5.197620391845703, 66.98622131347656, 30.422821044921875, 8.690240859985352, 11.774772644042969, 60.065711975097656, 24.753570556640625, 40.19865036010742, 15.683296203613281, 9.116348266601562, 1.0902347564697266, 29.44281005859375, 34.283172607421875, -8.081817626953125, -26.36656951904297, -6.627403259277344, 66.0183334350586, 35.42095184326172, 68.95565032958984, 24.338937759399414, -73.22457885742188, 139.45697021484375, -10.896635055541992, 17.088531494140625, 77.97445678710938, -21.911766052246094, 64.66299438476562, 45.26934814453125, 7.203521728515625, 36.106971740722656, 19.782081604003906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000100.npy"}
|
|
{"epoch": 0.14684287812041116, "step": 101, "batch_size": 64, "mean": 23.0167293548584, "std": 39.5931396484375, "min": -51.72026824951172, "p10": -19.86616401672363, "median": 18.58244228363037, "p90": 82.38601226806648, "max": 132.80038452148438, "pos_frac": 0.6875, "sample": [95.43736267089844, 132.80038452148438, 30.541885375976562, 29.69957733154297, -13.151481628417969, 101.09708404541016, 106.13153076171875, 10.024703979492188, 89.53326416015625, 106.57405090332031, -35.298828125, 27.36260986328125, -37.23778533935547, 46.80702209472656, 12.906875610351562, -8.178672790527344, -21.251953125, 13.656991958618164, 59.97425079345703, 27.936134338378906, -14.314773559570312, 24.8126220703125, 50.92439270019531, 61.785804748535156, 18.868553161621094, 18.28673553466797, 8.941410064697266, 63.60047149658203, 113.84121704101562, 24.666015625, -7.04522705078125, -6.296430587768555, -13.736297607421875, 51.37710952758789, 18.29633140563965, 37.82289123535156, 65.70909118652344, 43.64143371582031, -7.235443115234375, 6.312381744384766, -2.2173023223876953, -16.63265609741211, 19.40552520751953, -7.233543395996094, 2.0147628784179688, -13.574407577514648, 33.471031188964844, 32.03117370605469, 8.863716125488281, 24.366079330444336, -51.72026824951172, 34.97963333129883, 2.2058258056640625, -29.627208709716797, 25.120899200439453, 17.102643966674805, -13.169567108154297, 38.752418518066406, 6.465324401855469, -1.5305824279785156, 50.967926025390625, -27.647396087646484, 38.523277282714844, -33.46989440917969], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000101.npy"}
|
|
{"epoch": 0.14831130690161526, "step": 102, "batch_size": 64, "mean": 16.883743286132812, "std": 37.09708023071289, "min": -73.98828125, "p10": -21.15103149414062, "median": 9.967472076416016, "p90": 55.19505081176759, "max": 133.6350860595703, "pos_frac": 0.671875, "sample": [2.756593704223633, 39.22419738769531, 56.769508361816406, 10.657058715820312, 5.15057373046875, 7.034645080566406, -17.816635131835938, -43.193817138671875, 21.9775447845459, -70.7513427734375, 26.1451416015625, 85.02302551269531, -10.89431381225586, 37.84125518798828, 27.104183197021484, -8.636688232421875, 9.277885437011719, 33.54490661621094, 48.701683044433594, 0.3158149719238281, -14.013946533203125, -3.869699478149414, 46.15477752685547, -5.029417037963867, 56.18043518066406, 21.65985107421875, 36.060874938964844, 133.6350860595703, -3.5846481323242188, 0.3329620361328125, 107.65235137939453, -0.3299751281738281, 36.91121292114258, 41.05817413330078, 3.7324047088623047, -15.843521118164062, 31.015737533569336, -73.98828125, 0.537750244140625, -27.321060180664062, 1.2662277221679688, -42.03143310546875, -22.142959594726562, -3.2259597778320312, 52.89582061767578, 28.0380859375, 41.15081024169922, 79.41586303710938, 42.36891174316406, 9.096054077148438, 66.92464447021484, -26.328628540039062, -5.9426116943359375, 7.349140167236328, 52.076637268066406, -18.836532592773438, 38.19255828857422, 32.48523712158203, 24.17308235168457, 36.17295837402344, -12.180953979492188, -17.192337036132812, 41.01044464111328, 44.642337799072266], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000102.npy"}
|
|
{"epoch": 0.14977973568281938, "step": 103, "batch_size": 64, "mean": 33.756011962890625, "std": 40.74163055419922, "min": -34.49641418457031, "p10": -11.957221221923827, "median": 27.541614532470703, "p90": 78.47422332763672, "max": 174.6031494140625, "pos_frac": 0.796875, "sample": [-5.184345245361328, 8.287117004394531, 13.724794387817383, 32.87705993652344, -17.818222045898438, -23.816650390625, 33.03928756713867, 37.77623748779297, 13.706815719604492, 19.128284454345703, 27.710121154785156, 43.177093505859375, 16.502593994140625, 20.142532348632812, -2.1629791259765625, 64.95376586914062, 107.12918090820312, 35.38671875, 48.125511169433594, 59.09221649169922, -33.074432373046875, -11.998138427734375, 46.636077880859375, 27.37310791015625, -34.49641418457031, 73.60375213623047, 34.19061279296875, -16.43414306640625, 12.844833374023438, 19.51403045654297, 36.900657653808594, 158.03683471679688, 23.959640502929688, 19.130298614501953, 68.51431274414062, 65.43265533447266, -7.22515869140625, -11.861747741699219, 21.062963485717773, 17.218629837036133, -4.5569000244140625, 39.153167724609375, 60.14073181152344, -27.13763427734375, 21.797882080078125, 97.32389068603516, 77.82778930664062, 26.74835205078125, 174.6031494140625, 97.05746459960938, 101.77591705322266, 78.75126647949219, 11.769966125488281, 39.32244110107422, 65.17949676513672, -1.3384857177734375, 32.020957946777344, 4.157663345336914, 4.728275299072266, 49.72966766357422, 77.48888397216797, 16.516530990600586, 31.90618896484375, 44.31249237060547], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000103.npy"}
|
|
{"epoch": 0.1512481644640235, "step": 104, "batch_size": 64, "mean": 32.083656311035156, "std": 46.35171127319336, "min": -62.71562194824219, "p10": -12.042958259582518, "median": 19.466812133789062, "p90": 88.77457122802736, "max": 183.248046875, "pos_frac": 0.75, "sample": [87.43367004394531, -7.685005187988281, 13.458240509033203, 14.482818603515625, 60.07841491699219, -4.0304718017578125, -25.836463928222656, 2.8988895416259766, -2.5141830444335938, 37.918479919433594, -12.594377517700195, 32.45500946044922, 144.2257080078125, 51.06584930419922, 83.61194610595703, -1.9167022705078125, 89.3492431640625, 31.881635665893555, -16.805702209472656, 111.6820068359375, 35.93046569824219, 6.592826843261719, 20.185192108154297, 8.165607452392578, 13.554815292358398, 2.280050277709961, 183.248046875, -43.34009552001953, 55.54338836669922, 129.1685791015625, 133.59683227539062, 28.94183349609375, 13.504217147827148, 64.18694305419922, 11.98689079284668, 14.013870239257812, -10.75631332397461, -21.46539306640625, 110.0950927734375, 15.19000244140625, 48.402915954589844, -4.89208984375, 66.69508361816406, -1.9049453735351562, 80.88896179199219, 38.302215576171875, 31.55756378173828, 50.07746505737305, 36.12687683105469, 29.332565307617188, 4.224554061889648, 87.32656860351562, 8.567329406738281, 27.099815368652344, 61.67625427246094, 65.65143585205078, -6.85029411315918, 27.6610164642334, -0.5220527648925781, 18.748432159423828, -21.520545959472656, 1.7035865783691406, -62.71562194824219, 7.935161590576172], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000104.npy"}
|
|
{"epoch": 0.1527165932452276, "step": 105, "batch_size": 64, "mean": 45.658321380615234, "std": 40.5356559753418, "min": -62.70417785644531, "p10": 2.9770790100097666, "median": 42.83394241333008, "p90": 97.63106994628909, "max": 174.4459991455078, "pos_frac": 0.90625, "sample": [46.484588623046875, 31.374893188476562, 45.3277587890625, 28.131118774414062, 40.60984802246094, 42.707618713378906, 72.36871337890625, 107.63536834716797, 10.616388320922852, -62.70417785644531, -5.7277984619140625, 80.37422180175781, 81.59701538085938, 13.794866561889648, 34.28423309326172, 47.71176528930664, -32.13737487792969, 2.5869293212890625, -5.439666748046875, 106.0523910522461, 33.04206848144531, 102.48777770996094, 68.87461853027344, 20.180809020996094, 11.398361206054688, 80.91419219970703, 60.687767028808594, 42.96026611328125, 9.370172500610352, 37.58045196533203, 64.31044006347656, 158.11444091796875, 19.381866455078125, 50.74967956542969, 16.98822784423828, 17.433258056640625, 69.20417022705078, 89.32989501953125, 46.13714599609375, 16.375370025634766, 49.31647491455078, 90.08895874023438, 34.28777313232422, 37.9097900390625, 41.39543151855469, 51.27723693847656, -0.5826797485351562, 63.670257568359375, 67.90861511230469, 100.8634033203125, 35.057579040527344, 23.70117950439453, -17.80303955078125, 8.846160888671875, 63.22828674316406, 60.334228515625, 55.36261749267578, 106.416748046875, 58.5203857421875, 174.4459991455078, 24.85028839111328, 3.8874282836914062, 12.952526092529297, 75.02717590332031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000105.npy"}
|
|
{"epoch": 0.15418502202643172, "step": 106, "batch_size": 64, "mean": 28.547531127929688, "std": 35.17011260986328, "min": -53.068092346191406, "p10": -13.400667572021478, "median": 28.971057891845703, "p90": 74.28189849853516, "max": 131.30999755859375, "pos_frac": 0.84375, "sample": [28.161605834960938, 5.057191848754883, 7.942235946655273, -3.9114151000976562, 17.939029693603516, 1.3221149444580078, 49.726470947265625, -7.093025207519531, 30.84893035888672, 66.57478332519531, 35.872894287109375, 35.551414489746094, 3.0796852111816406, 73.329833984375, 24.83075714111328, 88.66896057128906, 6.20305061340332, 12.237190246582031, 74.68992614746094, 1.1338443756103516, 62.2325439453125, 40.28445816040039, 2.198486328125, 82.86776733398438, 23.668363571166992, 40.87659454345703, 7.636512756347656, -16.10394287109375, 52.480125427246094, -33.54309844970703, 46.10472106933594, 15.168281555175781, 39.68574142456055, 40.30021286010742, -2.951967239379883, 5.646217346191406, 23.022613525390625, 3.6230087280273438, 89.55967712402344, 62.62767791748047, 63.7762451171875, 24.24212646484375, -53.068092346191406, 4.471954345703125, 131.30999755859375, 35.606109619140625, 27.830551147460938, 34.30756378173828, 31.882369995117188, 7.4904632568359375, 0.6522083282470703, 56.400657653808594, 102.29015350341797, 33.94422912597656, 54.53515625, 56.02015686035156, -22.497879028320312, 31.528518676757812, -21.644927978515625, 29.78050994873047, 78.01158142089844, -40.64094543457031, -22.527626037597656, 45.79139709472656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000106.npy"}
|
|
{"epoch": 0.15565345080763582, "step": 107, "batch_size": 64, "mean": 32.29783630371094, "std": 41.55763626098633, "min": -86.01530456542969, "p10": -12.304551887512197, "median": 21.553399085998535, "p90": 83.75872192382813, "max": 137.59239196777344, "pos_frac": 0.859375, "sample": [82.08799743652344, 107.64443969726562, 46.73741149902344, 66.89472961425781, 24.354652404785156, 29.903587341308594, 50.25151062011719, -2.9845027923583984, -86.01530456542969, 8.377645492553711, 16.00848960876465, 76.28114318847656, -35.12029266357422, 28.02914047241211, 9.740686416625977, 6.7650909423828125, 15.276006698608398, 84.47474670410156, 1.9118385314941406, -26.383071899414062, 102.90975189208984, 66.99842834472656, 78.0601806640625, 5.95469856262207, -17.422691345214844, 4.129585266113281, -1.3209152221679688, 5.2672882080078125, 137.59239196777344, 72.93347930908203, 18.95364761352539, 17.178680419921875, 94.00615692138672, 43.99012756347656, 64.34393310546875, 85.34720611572266, 16.445837020874023, 27.49126434326172, 16.506650924682617, -16.298858642578125, 61.68971252441406, -46.067230224609375, 63.436241149902344, 74.81829833984375, 11.529502868652344, 80.69651794433594, 1.3683013916015625, 78.99052429199219, 49.85325622558594, 51.15544891357422, 22.01342010498047, 21.0933780670166, 2.3251800537109375, -27.06578826904297, 0.1507110595703125, 5.6333160400390625, 13.048847198486328, 70.07275390625, 9.5869140625, 23.7579288482666, 64.23854064941406, 95.33023071289062, 2.4152069091796875, 9.687545776367188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000107.npy"}
|
|
{"epoch": 0.15712187958883994, "step": 108, "batch_size": 64, "mean": 38.376766204833984, "std": 55.82282257080078, "min": -59.97906494140625, "p10": -22.16678638458252, "median": 27.378726959228516, "p90": 118.28107833862308, "max": 206.72198486328125, "pos_frac": 0.765625, "sample": [35.01667785644531, 130.7605438232422, 38.14289855957031, -45.9859619140625, -59.97906494140625, 102.48583984375, 30.345014572143555, 8.859207153320312, 206.72198486328125, -0.4571075439453125, -11.389389038085938, 121.38001251220703, 7.198951721191406, 5.43682861328125, -5.294862747192383, 72.4368667602539, -6.634426116943359, 93.36231994628906, 156.15333557128906, 103.9257583618164, 111.05023193359375, 79.66761779785156, 1.1192646026611328, -21.171072006225586, 46.559669494628906, 10.705940246582031, 26.806915283203125, 15.173015594482422, 71.71768188476562, 4.664264678955078, 25.40924835205078, 0.6609764099121094, 70.34345245361328, -22.593521118164062, 168.54122924804688, -37.46467590332031, 78.31620788574219, -29.354503631591797, -14.995559692382812, 4.394926071166992, 26.441795349121094, 6.647674560546875, 47.46037292480469, 147.97335815429688, 32.64592742919922, 9.387470245361328, 84.35376739501953, 70.85951232910156, 28.463579177856445, -49.9498291015625, 15.118026733398438, 50.44145965576172, 122.46530151367188, 61.006980895996094, -0.8505630493164062, -1.1794071197509766, 37.96160125732422, 15.973834991455078, 72.17156982421875, -48.17026138305664, 27.950538635253906, 41.58021926879883, 62.21165466308594, 23.1116943359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000108.npy"}
|
|
{"epoch": 0.15859030837004406, "step": 109, "batch_size": 64, "mean": 41.38175964355469, "std": 48.83089828491211, "min": -79.39309692382812, "p10": -11.662704467773436, "median": 34.84614181518555, "p90": 104.10176391601563, "max": 179.07785034179688, "pos_frac": 0.828125, "sample": [49.24090576171875, 7.899898529052734, 129.82408142089844, 83.11917877197266, 30.346473693847656, 53.33671188354492, -55.32289123535156, 11.685935974121094, 17.655517578125, 49.203826904296875, 128.27197265625, 41.336483001708984, 72.25675964355469, 50.93424987792969, 53.20403289794922, 5.752176284790039, 21.220890045166016, 36.360313415527344, -12.829113006591797, -4.99659538269043, -6.680812835693359, 179.07785034179688, 0.4922618865966797, -9.503936767578125, 26.001089096069336, 4.483406066894531, 122.02677917480469, 30.659151077270508, 32.434112548828125, -37.22293472290039, 62.26594543457031, 65.31752014160156, 8.958629608154297, 66.35487365722656, 55.8416748046875, 37.65691375732422, 96.78108215332031, 69.89212799072266, 104.73174285888672, 102.6318130493164, -0.7390365600585938, 12.418113708496094, -79.39309692382812, 29.40721893310547, 1.395263671875, 71.51016998291016, 32.74705505371094, 33.184322357177734, 19.12349510192871, 82.38372802734375, -46.870819091796875, 35.35858154296875, 35.89296340942383, 107.43870544433594, 124.49113464355469, 34.333702087402344, 84.4215316772461, 34.13954162597656, 86.75625610351562, 102.01927947998047, -12.587890625, -21.891372680664062, 3.340608596801758, 98.85311126708984], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000109.npy"}
|
|
{"epoch": 0.16005873715124816, "step": 110, "batch_size": 64, "mean": 37.423946380615234, "std": 52.068763732910156, "min": -94.5721435546875, "p10": -14.092668533325194, "median": 32.070075035095215, "p90": 107.83451538085939, "max": 176.31283569335938, "pos_frac": 0.828125, "sample": [114.00843811035156, 52.0699462890625, 22.999530792236328, 52.83551025390625, 56.32288360595703, 35.628929138183594, 48.04110336303711, 3.3814010620117188, -94.5721435546875, 44.414466857910156, 65.8060302734375, 15.476913452148438, -61.7315673828125, 18.090051651000977, 13.52627182006836, 1.1153278350830078, 29.711877822875977, 164.9337158203125, 103.74380493164062, 26.441314697265625, 4.930149078369141, -8.103763580322266, 176.31283569335938, 129.9278564453125, 98.12101745605469, 76.32437133789062, 17.389602661132812, -15.939903259277344, -11.93703842163086, 59.807186126708984, -41.29008483886719, -15.016510009765625, 9.61532974243164, 19.11534309387207, 5.82904052734375, 155.37130737304688, 52.49598693847656, 83.5357666015625, 109.58767700195312, 4.5007171630859375, 75.41921997070312, -7.068085670471191, 38.25730895996094, 34.99004364013672, 13.688289642333984, 12.19970703125, 76.75664520263672, -57.3524169921875, 71.83248901367188, -24.278427124023438, 39.56952667236328, 1.9475135803222656, 54.256797790527344, 34.42827224731445, 48.609859466552734, 43.64099884033203, 19.726526260375977, 19.636924743652344, 18.93033218383789, 43.233314514160156, 77.54350280761719, 2.1946544647216797, -4.384464263916016, 138.53329467773438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000110.npy"}
|
|
{"epoch": 0.16152716593245228, "step": 111, "batch_size": 64, "mean": 42.30280303955078, "std": 57.86744689941406, "min": -81.54879760742188, "p10": -31.243550109863275, "median": 33.0509033203125, "p90": 114.88379058837891, "max": 189.19546508789062, "pos_frac": 0.765625, "sample": [189.19546508789062, -49.60833740234375, -81.54879760742188, 115.70333862304688, -7.380222320556641, 53.20487976074219, 12.846473693847656, 33.27002716064453, 26.59933090209961, 141.8157196044922, 31.617637634277344, 76.82403564453125, 42.526084899902344, 54.54083251953125, 27.798126220703125, 106.94366455078125, 109.8185806274414, -34.459747314453125, 97.66365051269531, 8.524980545043945, 77.51273345947266, -24.94915771484375, -22.102977752685547, -10.41168212890625, 64.48579406738281, 112.97151184082031, -6.050079345703125, 109.94297790527344, -33.94114685058594, 11.028617858886719, -22.845428466796875, 32.83177947998047, 129.45465087890625, 22.362930297851562, 144.93984985351562, 54.753631591796875, 30.359111785888672, 53.42318344116211, 66.23787689208984, 128.5244140625, 23.914459228515625, -41.78724670410156, 19.252788543701172, 26.98868179321289, 135.6851806640625, 68.78419494628906, 28.680023193359375, 57.13935852050781, 108.25798034667969, 2.3196964263916016, 109.81843566894531, 36.71337890625, 91.69222259521484, 69.02365112304688, 50.781532287597656, 13.32756233215332, -80.9703369140625, 31.485084533691406, 27.17033576965332, 40.36967468261719, -5.305706024169922, 90.83354187011719, -61.00807189941406, -10.211288452148438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000111.npy"}
|
|
{"epoch": 0.16299559471365638, "step": 112, "batch_size": 64, "mean": 27.042510986328125, "std": 39.837196350097656, "min": -58.46979522705078, "p10": -24.0540771484375, "median": 25.093753814697266, "p90": 88.50456390380862, "max": 119.09957885742188, "pos_frac": 0.765625, "sample": [43.102115631103516, 119.09957885742188, 84.14384460449219, -5.904844284057617, -42.15517807006836, 32.640384674072266, 36.221561431884766, 29.38568115234375, 27.977928161621094, -5.969642639160156, 96.53790283203125, 78.94606018066406, 0.16957664489746094, -2.4005508422851562, 90.37344360351562, 60.28194808959961, 38.957759857177734, 55.074913024902344, 38.66630172729492, 35.512786865234375, 44.158294677734375, 32.20830535888672, 67.93315124511719, -14.675193786621094, 25.156448364257812, 21.920957565307617, 29.774681091308594, 31.77138900756836, -23.306129455566406, 17.381145477294922, 3.490732192993164, -33.175724029541016, 29.55518341064453, 106.36698150634766, 109.36446380615234, -31.550399780273438, 40.2030029296875, 15.063671112060547, 69.2110595703125, 0.5396709442138672, -58.46979522705078, 94.40007781982422, 24.48564910888672, 13.014778137207031, -11.184951782226562, 21.209644317626953, 61.699188232421875, 0.5886764526367188, 14.882743835449219, 96.10386657714844, 25.03105926513672, 9.140644073486328, 43.78784942626953, -40.681549072265625, 11.67170524597168, -24.37462615966797, -25.574424743652344, -10.302543640136719, 72.18505859375, 0.9461498260498047, 23.956586837768555, 5.245445251464844, -8.49078369140625, 39.39708709716797], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000112.npy"}
|
|
{"epoch": 0.1644640234948605, "step": 113, "batch_size": 64, "mean": 29.26871109008789, "std": 37.84221267700195, "min": -39.223541259765625, "p10": -15.624301338195798, "median": 30.973909378051758, "p90": 81.2593894958496, "max": 116.9789047241211, "pos_frac": 0.734375, "sample": [-31.248046875, 74.07969665527344, -17.29705047607422, 87.2779541015625, 9.124191284179688, 37.88218688964844, 14.383920669555664, 36.349456787109375, 4.254243850708008, -8.934333801269531, 8.5811767578125, 83.22268676757812, 54.51631164550781, 108.43536376953125, 37.54319763183594, -18.122663497924805, -6.19061279296875, -8.438880920410156, 8.657447814941406, 65.94467163085938, -20.769729614257812, 47.00849914550781, 78.63824462890625, -21.50738525390625, 53.611961364746094, -7.341705322265625, 3.871358871459961, 116.9789047241211, 46.707054138183594, -39.223541259765625, 18.499671936035156, 74.34237670898438, 34.08088302612305, -11.014892578125, 35.80747985839844, 51.38391876220703, 13.737464904785156, 81.3922348022461, 19.888931274414062, -10.370758056640625, 51.49054718017578, 70.24627685546875, -11.721220016479492, 1.319234848022461, -26.138778686523438, 44.79397964477539, 35.17298126220703, 39.85040283203125, -9.897865295410156, 15.030462265014648, 111.54217529296875, -2.4377098083496094, 33.00356674194336, 32.79298400878906, 97.82955169677734, 80.94941711425781, 32.65266799926758, 62.01026916503906, -6.966501235961914, 11.262466430664062, 12.263275146484375, 15.16055679321289, 29.295150756835938, 47.951751708984375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000113.npy"}
|
|
{"epoch": 0.16593245227606462, "step": 114, "batch_size": 64, "mean": 46.41866683959961, "std": 63.81900405883789, "min": -40.19544219970703, "p10": -26.334640502929688, "median": 42.48588562011719, "p90": 108.92114639282231, "max": 345.22332763671875, "pos_frac": 0.765625, "sample": [175.47457885742188, 89.7815933227539, 51.97412109375, 47.39861297607422, 26.39226531982422, 116.13877868652344, 19.412641525268555, 45.7530517578125, -37.340301513671875, 65.76534271240234, -30.824270248413086, 69.73184204101562, 44.423912048339844, 20.46508026123047, -37.50885772705078, 83.8590087890625, 14.447261810302734, -31.550804138183594, 33.5517578125, 120.24971008300781, 13.566003799438477, 54.37975311279297, -7.141845703125, 113.09525299072266, 40.35858917236328, 59.11016845703125, 34.07942199707031, 24.550739288330078, 37.94245910644531, 60.617549896240234, 63.83507537841797, -5.8095550537109375, -9.16729736328125, 56.287315368652344, -24.89471435546875, 15.258050918579102, 49.17717742919922, 68.13323974609375, 42.21647644042969, 99.18156433105469, 58.92233657836914, 87.86813354492188, 5.547904968261719, 49.70307922363281, 37.505313873291016, -12.045318603515625, 232.7224884033203, 6.414785385131836, 42.75529479980469, 139.67669677734375, -9.669927597045898, 71.54861450195312, -40.19544219970703, -26.951751708984375, 345.22332763671875, 56.97404861450195, -3.5288467407226562, 84.75015258789062, 33.81144714355469, -31.93661117553711, 26.18958282470703, 86.28089904785156, -17.447246551513672, 74.30500793457031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000114.npy"}
|
|
{"epoch": 0.16740088105726872, "step": 115, "batch_size": 64, "mean": 32.142608642578125, "std": 64.82154083251953, "min": -149.059814453125, "p10": -27.917952728271484, "median": 19.263816833496094, "p90": 124.3146705627442, "max": 235.79763793945312, "pos_frac": 0.75, "sample": [0.5685615539550781, -2.8128280639648438, 130.42262268066406, 59.01904296875, -50.115631103515625, -0.38089752197265625, -45.125450134277344, 10.397706985473633, 13.026958465576172, 18.00251007080078, 25.12104034423828, -8.158218383789062, 42.84785842895508, 56.098304748535156, 10.052793502807617, 165.8162384033203, 21.326066970825195, -7.1557769775390625, 235.79763793945312, -11.25996208190918, 73.3510513305664, 7.5061492919921875, 137.34857177734375, 69.29875946044922, 169.51876831054688, -29.228057861328125, 22.959793090820312, 25.883487701416016, 7.98914909362793, 96.20433044433594, 172.5381317138672, 97.502197265625, 0.6962738037109375, -119.0702133178711, 9.221691131591797, 34.27350616455078, 57.79851531982422, 16.706035614013672, 9.19549560546875, 42.61400604248047, 27.535720825195312, 49.14842224121094, 25.878524780273438, 29.398460388183594, 140.52450561523438, -27.250396728515625, 21.219188690185547, 14.985015869140625, -8.238815307617188, 12.320610046386719, 20.525123596191406, 77.71673583984375, 3.1708450317382812, 15.206829071044922, -20.180782318115234, 110.06278228759766, 65.14149475097656, -15.810569763183594, 63.18345642089844, -149.059814453125, 106.572509765625, -47.290428161621094, 4.775320053100586, -28.20404815673828], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000115.npy"}
|
|
{"epoch": 0.16886930983847284, "step": 116, "batch_size": 64, "mean": 35.38352966308594, "std": 45.090545654296875, "min": -35.362083435058594, "p10": -15.227689552307128, "median": 30.305294036865234, "p90": 89.13259277343751, "max": 208.32237243652344, "pos_frac": 0.78125, "sample": [7.563470840454102, 3.5611610412597656, -32.37331771850586, -30.20110321044922, 25.598093032836914, 44.71342468261719, 60.4840087890625, 23.27397918701172, 47.57246398925781, 56.62689208984375, 29.370216369628906, -10.115358352661133, 41.11930847167969, 27.82665252685547, 7.975467681884766, -6.34208869934082, 53.70311737060547, -35.362083435058594, 81.39515686035156, 86.64601135253906, 39.036842346191406, -16.012969970703125, 22.723709106445312, 16.984947204589844, 48.07250213623047, 25.277503967285156, 73.92929077148438, 104.75979614257812, 32.048065185546875, -32.31194305419922, 119.66232299804688, 35.61064910888672, 22.40111541748047, 42.681976318359375, -13.395368576049805, -26.6180419921875, 6.500820159912109, 8.3065185546875, -2.1711597442626953, 98.15087890625, 5.270849227905273, 128.34193420410156, 31.240371704101562, 87.7708740234375, 79.14398193359375, 33.95980453491211, 55.109092712402344, -30.543167114257812, -8.207998275756836, 80.15241241455078, 23.974376678466797, -6.177515029907227, 208.32237243652344, 64.25285339355469, 11.9959716796875, 49.97285461425781, 67.22525024414062, 62.20280838012695, -10.485605239868164, 14.701271057128906, 1.0024871826171875, 40.88456726074219, 96.04701232910156, 89.7161865234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000116.npy"}
|
|
{"epoch": 0.17033773861967694, "step": 117, "batch_size": 64, "mean": 28.79342269897461, "std": 50.358863830566406, "min": -98.87026977539062, "p10": -25.984368896484366, "median": 23.020792961120605, "p90": 85.40611572265627, "max": 183.33319091796875, "pos_frac": 0.75, "sample": [13.001102447509766, 53.21277618408203, 49.96458435058594, -15.637187957763672, 21.37879753112793, -2.6282119750976562, 19.21799659729004, 38.93675231933594, 20.393821716308594, 39.491600036621094, 42.86838912963867, 66.21492004394531, 130.419677734375, 111.9403076171875, 17.232017517089844, 45.16474914550781, -58.98857116699219, 87.40318298339844, 7.917022705078125, -95.8043212890625, -30.284912109375, 8.188629150390625, 15.7161865234375, 62.61274337768555, 43.69197463989258, -15.94976806640625, -31.560073852539062, 3.819478988647461, 24.66278839111328, -13.638175964355469, 38.355628967285156, 119.65565490722656, -4.211250305175781, -0.76898193359375, 145.89633178710938, 14.564208984375, 183.33319091796875, 15.419815063476562, 29.919822692871094, 62.52771759033203, 14.234813690185547, 8.651447296142578, 8.328969955444336, -9.277915954589844, -98.87026977539062, 30.340606689453125, 79.48307800292969, 31.246994018554688, 88.78496551513672, 37.36378479003906, -47.130340576171875, -9.502325057983398, 6.501125335693359, 54.7862663269043, 80.74629211425781, 78.79981994628906, 6.0871124267578125, 49.042755126953125, 69.64988708496094, 27.835540771484375, 43.302886962890625, -1.8251571655273438, -37.45878601074219, 68.00720977783203], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000117.npy"}
|
|
{"epoch": 0.17180616740088106, "step": 118, "batch_size": 64, "mean": 35.48102569580078, "std": 45.978145599365234, "min": -54.203582763671875, "p10": -11.347178268432614, "median": 23.785919189453125, "p90": 108.28695220947269, "max": 155.19317626953125, "pos_frac": 0.78125, "sample": [-7.45147705078125, 21.063804626464844, -0.2848949432373047, -25.508071899414062, 19.62920379638672, 16.910167694091797, 60.19553756713867, 42.419471740722656, 47.86420440673828, 112.78955078125, 85.54011535644531, 23.531524658203125, 122.12503051757812, 30.55451202392578, -6.9435882568359375, 54.131309509277344, -16.156909942626953, -12.320144653320312, 32.93107223510742, 155.19317626953125, 120.52243041992188, -9.076923370361328, -54.203582763671875, 2.8790283203125, -38.83384704589844, 19.58254051208496, 2.100004196166992, 33.2315673828125, 13.790546417236328, 13.370254516601562, 11.604660034179688, 73.63341522216797, 2.5700836181640625, 74.68041229248047, 25.34778594970703, 48.76948547363281, 21.719642639160156, 40.270477294921875, 146.8447265625, -2.8382911682128906, 29.970001220703125, -17.987651824951172, -3.2522430419921875, 40.33045959472656, 142.97894287109375, 83.82117462158203, 111.45472717285156, 43.387420654296875, 77.76007843017578, 14.544729232788086, 1.074462890625, 66.31239318847656, 100.89547729492188, 69.80858612060547, 49.369468688964844, 24.040313720703125, 35.80571746826172, 17.161651611328125, 0.010175704956054688, 23.320419311523438, -21.91374969482422, 81.18154907226562, 3.1093292236328125, -4.575828552246094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000118.npy"}
|
|
{"epoch": 0.17327459618208516, "step": 119, "batch_size": 64, "mean": 34.964866638183594, "std": 55.77119827270508, "min": -69.93711853027344, "p10": -30.386153411865234, "median": 35.01447868347168, "p90": 96.02911682128908, "max": 207.301025390625, "pos_frac": 0.671875, "sample": [13.827661514282227, 56.305511474609375, 38.215362548828125, 142.07208251953125, 39.6502685546875, -31.554977416992188, -8.039497375488281, 50.256622314453125, -5.573780059814453, 50.441139221191406, -9.300148010253906, -11.382667541503906, 207.301025390625, 48.612144470214844, 18.338729858398438, 121.33074951171875, 22.927234649658203, 9.993587493896484, 181.18296813964844, 48.06111145019531, 90.82337951660156, 73.72306823730469, 74.2200698852539, -5.346393585205078, 29.97917938232422, 34.529808044433594, -32.521995544433594, 18.241546630859375, 53.84953308105469, 56.0277099609375, -0.222015380859375, -4.448219299316406, 98.26014709472656, 36.88920211791992, 169.03964233398438, -4.3070526123046875, 64.6068344116211, 78.85726928710938, -2.352121353149414, 26.937461853027344, -55.29292297363281, 42.68274688720703, -57.5931396484375, 128.967041015625, 50.73919677734375, 65.79779815673828, -23.785919189453125, -30.796463012695312, 84.11617279052734, 35.499149322509766, -29.42876434326172, -69.93711853027344, -2.6151046752929688, 83.23567199707031, 17.370513916015625, 56.6740837097168, 49.997596740722656, 9.944513320922852, 65.76481628417969, -6.874094009399414, -42.89453887939453, 3.7375946044921875, 41.708404541015625, -18.71808624267578], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000119.npy"}
|
|
{"epoch": 0.17474302496328928, "step": 120, "batch_size": 64, "mean": 48.279685974121094, "std": 59.23860549926758, "min": -80.04412841796875, "p10": -19.45223960876464, "median": 38.3485107421875, "p90": 133.17235412597657, "max": 190.67474365234375, "pos_frac": 0.84375, "sample": [151.5937957763672, 84.99212646484375, 32.9530029296875, 110.66830444335938, 36.977359771728516, 10.088586807250977, -63.26861572265625, 99.52027893066406, 20.053546905517578, 73.2657241821289, 52.6875, 161.76712036132812, 79.74446868896484, 6.668464660644531, 17.44566535949707, 17.355438232421875, 135.03848266601562, 56.735443115234375, 69.15105438232422, -41.56682586669922, -40.78120422363281, 128.81805419921875, 40.107574462890625, 32.94575500488281, 42.21105194091797, 36.86936950683594, 47.11210632324219, 6.474773406982422, 160.03128051757812, -80.04412841796875, -23.190032958984375, 37.74946594238281, 24.20482635498047, 3.8371849060058594, 103.20256042480469, 9.35032844543457, 190.67474365234375, 53.57662582397461, 69.72830200195312, 3.395986557006836, 11.57375717163086, -5.601861953735352, 54.303382873535156, 182.42266845703125, 34.83433532714844, 43.90411376953125, 126.36116027832031, 172.009521484375, 38.94755554199219, 47.072669982910156, 0.33863067626953125, 87.3635025024414, -42.39158630371094, 18.556236267089844, 63.610084533691406, 36.176513671875, -2.764942169189453, 110.66671752929688, 74.51393127441406, -11.699146270751953, 75.25942993164062, -22.774993896484375, 36.188385009765625, 2.8841400146484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000120.npy"}
|
|
{"epoch": 0.1762114537444934, "step": 121, "batch_size": 64, "mean": 53.492347717285156, "std": 69.47612762451172, "min": -147.92068481445312, "p10": -10.001323318481441, "median": 48.22388458251953, "p90": 149.72737426757814, "max": 215.3943328857422, "pos_frac": 0.78125, "sample": [8.579057693481445, -5.361457824707031, -1.762298583984375, 215.3943328857422, -6.449474334716797, 10.135932922363281, 94.50265502929688, -16.10687828063965, 77.89765167236328, 76.36898040771484, -0.061279296875, 62.217384338378906, 136.93592834472656, 107.77368927001953, 78.64398193359375, 0.04378509521484375, 88.31130981445312, 65.37849426269531, 53.240516662597656, -40.808876037597656, 36.84595489501953, 113.94789123535156, -59.056602478027344, 145.8870086669922, 167.3637237548828, 64.42304992675781, 33.570350646972656, 16.025257110595703, 111.0394287109375, 3.0889320373535156, 76.85639953613281, -21.46167755126953, 47.989776611328125, 83.22142028808594, 1.3088359832763672, 199.20745849609375, 4.889900207519531, 179.17520141601562, 45.096221923828125, 203.6339111328125, 9.261314392089844, 125.73896026611328, -11.523544311523438, 15.926422119140625, -25.766082763671875, 15.431632995605469, 73.89877319335938, 23.170806884765625, -6.1299591064453125, 55.042510986328125, 4.671548843383789, 19.358863830566406, 63.99098205566406, 130.0633544921875, 151.3732452392578, 70.72348022460938, -147.92068481445312, 56.372528076171875, -5.191802978515625, 76.67581176757812, 205.30029296875, 19.408254623413086, -2.750457763671875, 48.45799255371094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000121.npy"}
|
|
{"epoch": 0.1776798825256975, "step": 122, "batch_size": 64, "mean": 66.51847839355469, "std": 65.45465850830078, "min": -89.48963928222656, "p10": 0.6307792663574229, "median": 51.97062683105469, "p90": 158.50116119384765, "max": 232.6812744140625, "pos_frac": 0.90625, "sample": [147.26353454589844, 21.74146270751953, 74.50577545166016, 80.92092895507812, 18.877058029174805, 26.95111083984375, 50.422821044921875, 95.0481948852539, 22.31008529663086, 114.14991760253906, 15.720836639404297, 128.04998779296875, 122.2857894897461, 232.6812744140625, 102.1299819946289, 34.43830108642578, 16.67131805419922, 56.03797149658203, 158.5195770263672, 48.90458679199219, 19.613401412963867, 140.0196533203125, 35.24557876586914, -10.219047546386719, 81.62562561035156, 12.037429809570312, 62.98876953125, 140.84872436523438, 74.62419891357422, 65.02352905273438, 74.42150115966797, -25.62993621826172, 170.82058715820312, -7.273839950561523, 52.95196533203125, 158.45819091796875, 62.84857177734375, 25.010337829589844, 171.93899536132812, 154.9817352294922, 9.845113754272461, 50.989288330078125, -34.011966705322266, 228.75535583496094, 18.89740753173828, 40.25577926635742, 31.355430603027344, 42.75746536254883, 25.070884704589844, -89.48963928222656, -7.3970489501953125, 27.640762329101562, 30.57774543762207, 67.12137603759766, 196.99237060546875, 1.5966224670410156, 114.55239868164062, 53.76142120361328, 0.21684646606445312, 13.852088928222656, 161.9410400390625, 74.64424133300781, 19.895309448242188, 149.3961181640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000122.npy"}
|
|
{"epoch": 0.17914831130690162, "step": 123, "batch_size": 64, "mean": 43.66535186767578, "std": 72.58432006835938, "min": -149.0946044921875, "p10": -25.914376640319823, "median": 22.062121391296387, "p90": 141.342221069336, "max": 232.0101318359375, "pos_frac": 0.671875, "sample": [77.86064910888672, 81.28756713867188, -4.355216979980469, 9.003898620605469, 88.48408508300781, 75.94435119628906, 117.9527587890625, 109.91635131835938, 78.90876007080078, 48.02960968017578, -0.4599952697753906, -20.62835693359375, 7.753009796142578, -39.903038024902344, 154.3282928466797, 85.12107849121094, 16.459575653076172, -12.908889770507812, -3.4599456787109375, 65.64547729492188, -5.582366943359375, 120.56692504882812, 64.68882751464844, -75.12178039550781, 21.427772521972656, 95.52398681640625, 212.4322509765625, 22.022132873535156, 150.04776000976562, 163.12646484375, -26.110986709594727, 62.798927307128906, 20.430431365966797, 8.658775329589844, 83.41788482666016, 121.029296875, 20.446151733398438, 47.73419189453125, 175.63429260253906, -20.924819946289062, 91.53128051757812, 84.12788391113281, -41.83488845825195, -66.40733337402344, 74.68095397949219, -7.7734375, -39.42961502075195, -4.532110214233398, -17.79187774658203, 232.0101318359375, -25.45561981201172, 68.4447021484375, 50.50145721435547, 204.12686157226562, -9.050430297851562, 22.102109909057617, 118.84306335449219, 34.80408477783203, -149.0946044921875, -15.401374816894531, 0.18337249755859375, -9.567272186279297, 0.3914318084716797, 1.9475536346435547], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000123.npy"}
|
|
{"epoch": 0.18061674008810572, "step": 124, "batch_size": 64, "mean": 63.998435974121094, "std": 94.07720184326172, "min": -119.12956237792969, "p10": -26.179146194458006, "median": 41.06078338623047, "p90": 175.1367126464844, "max": 372.7012634277344, "pos_frac": 0.78125, "sample": [144.74769592285156, -25.517013549804688, 41.26829528808594, 32.93009948730469, 25.883007049560547, 148.1160125732422, -98.45211791992188, -9.209434509277344, 159.98907470703125, 185.93206787109375, -2.8145523071289062, 29.603641510009766, 4.553615570068359, 68.3023910522461, 76.27111053466797, 229.95986938476562, 154.96302795410156, 51.17820358276367, -18.887741088867188, 48.859840393066406, -119.12956237792969, -41.93201446533203, 139.41217041015625, -1.9359550476074219, 201.43316650390625, 128.47735595703125, 75.85931396484375, -5.264129638671875, 46.445709228515625, 118.68771362304688, -26.46291732788086, 33.77312088012695, 86.004150390625, 15.275468826293945, 11.308807373046875, 344.79986572265625, 100.45320129394531, 177.6453857421875, 29.148574829101562, 169.28314208984375, 103.60492706298828, -49.402366638183594, 16.456790924072266, -19.229843139648438, 39.01728057861328, 121.63018798828125, -100.01632690429688, 98.72506713867188, 20.68976402282715, 40.853271484375, 38.946537017822266, 75.20694732666016, -64.87773132324219, 248.55093383789062, 23.661346435546875, 372.7012634277344, 46.3978271484375, 165.05966186523438, 1.3960514068603516, 32.42432403564453, 52.15596008300781, 75.74846649169922, 14.182975769042969, 11.056732177734375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000124.npy"}
|
|
{"epoch": 0.18208516886930984, "step": 125, "batch_size": 64, "mean": 57.12309646606445, "std": 76.90096282958984, "min": -88.54559326171875, "p10": -27.918128967285153, "median": 33.050578117370605, "p90": 162.3137603759766, "max": 249.46945190429688, "pos_frac": 0.78125, "sample": [-3.4563846588134766, 147.55577087402344, 89.96006774902344, 22.595367431640625, -12.869747161865234, 9.742755889892578, 111.20695495605469, -25.959579467773438, 140.22933959960938, 16.514638900756836, 10.723758697509766, -28.75750732421875, 24.44466209411621, 1.0699615478515625, 102.42084503173828, 108.15167236328125, 86.22364807128906, 53.98322296142578, 18.35262107849121, 4.904998779296875, 105.81422424316406, 18.57978057861328, -54.013702392578125, -13.102630615234375, 90.35135650634766, 249.46945190429688, -35.17326354980469, 31.088743209838867, 105.56912231445312, 35.42174530029297, -5.5638580322265625, -80.78047943115234, 35.012413024902344, 16.348875045776367, 30.524917602539062, 207.3389434814453, 6.288860321044922, 204.66021728515625, 120.28335571289062, -23.36774444580078, 11.700634002685547, 103.51644897460938, 186.00390625, 91.08399200439453, 226.95062255859375, 90.62046813964844, 55.51795196533203, 46.565242767333984, 166.42666625976562, 51.01831817626953, 11.842849731445312, 126.99229431152344, 41.708152770996094, -15.71412467956543, 108.35075378417969, 26.762020111083984, -41.37818908691406, 149.5689239501953, 30.67439079284668, -88.54559326171875, 207.15489196777344, 152.71697998046875, -33.287384033203125, 27.84044075012207], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000125.npy"}
|
|
{"epoch": 0.18355359765051396, "step": 126, "batch_size": 64, "mean": 53.1960334777832, "std": 70.9228286743164, "min": -77.41954803466797, "p10": -23.095580291748043, "median": 38.00962257385254, "p90": 156.21297149658204, "max": 245.445068359375, "pos_frac": 0.828125, "sample": [118.616943359375, 64.79252624511719, 12.185300827026367, 89.56005096435547, 13.133941650390625, 50.06450653076172, -37.78886413574219, 2.185688018798828, 58.89576721191406, 154.96371459960938, -50.80354309082031, 1.9590644836425781, 245.445068359375, 19.22516632080078, 84.37032318115234, 0.5326156616210938, 131.15719604492188, 41.753631591796875, 127.56396484375, 65.03825378417969, 44.72705841064453, 50.96043014526367, -8.222625732421875, 36.39847183227539, 64.52332305908203, 184.4442138671875, -19.700485229492188, 156.7483673095703, 32.567474365234375, 46.460540771484375, 20.651187896728516, 25.11547088623047, 71.87484741210938, 15.159408569335938, 177.42388916015625, 39.62077331542969, 57.533241271972656, -73.56925964355469, 145.95596313476562, -15.017337799072266, -59.38719177246094, -34.59523010253906, 233.45758056640625, -3.1742687225341797, 26.545507431030273, 9.991275787353516, 2.0450401306152344, 20.2042236328125, 121.14830017089844, 114.54144287109375, -77.41954803466797, 188.41075134277344, 83.86763000488281, 23.0760498046875, 35.22871398925781, 93.02436828613281, 30.999099731445312, 62.931556701660156, 17.115055084228516, -24.550621032714844, 13.557357788085938, 91.29786682128906, 12.416091918945312, 177.308837890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000126.npy"}
|
|
{"epoch": 0.18502202643171806, "step": 127, "batch_size": 64, "mean": 39.201202392578125, "std": 61.68424606323242, "min": -123.87353515625, "p10": -34.150109100341794, "median": 40.75104904174805, "p90": 115.02199325561524, "max": 180.01454162597656, "pos_frac": 0.78125, "sample": [54.00200653076172, 79.53555297851562, -8.205238342285156, 37.601158142089844, 2.912353515625, -91.50936889648438, 149.1126708984375, 7.3769378662109375, 99.19518280029297, -123.87353515625, 8.651906967163086, 61.345069885253906, 74.55982971191406, -33.97406768798828, 18.357341766357422, 35.77482604980469, 92.4933090209961, 10.262151718139648, -120.54515075683594, 72.50056457519531, 24.63189697265625, 72.41822814941406, 125.28152465820312, 20.585098266601562, 67.71914672851562, 165.38015747070312, 20.598987579345703, 82.99273681640625, -18.868858337402344, 14.898231506347656, 120.00728607177734, 76.6742935180664, 107.00440979003906, 24.93598175048828, 51.30525207519531, 147.3843231201172, -2.0030784606933594, -18.40930938720703, 20.49215316772461, -1.3974990844726562, 16.660327911376953, 112.8416976928711, 9.129032135009766, 75.24889373779297, 46.32777404785156, 62.122642517089844, 44.98352813720703, 115.95640563964844, 107.5722427368164, 51.813804626464844, -44.29737854003906, 47.68107604980469, 19.372621536254883, 180.01454162597656, -59.55956268310547, 61.318634033203125, 45.076805114746094, 92.07560729980469, 43.90093994140625, 12.826480865478516, 32.86986541748047, -23.789588928222656, -34.248313903808594, -34.225555419921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000127.npy"}
|
|
{"epoch": 0.18649045521292218, "step": 128, "batch_size": 64, "mean": 41.13318634033203, "std": 72.4039077758789, "min": -167.17593383789062, "p10": -47.46497650146483, "median": 36.08229064941406, "p90": 142.5253662109375, "max": 247.1139678955078, "pos_frac": 0.703125, "sample": [46.87533187866211, 43.27452087402344, 131.6289520263672, -12.6650390625, 89.09992980957031, -58.98446273803711, 99.6068115234375, 142.35626220703125, 35.21929931640625, 42.66004943847656, 161.88424682617188, 50.6419677734375, 47.95170593261719, 34.86442565917969, -83.51541900634766, 97.69987487792969, 148.38284301757812, 4.652956008911133, 0.5356922149658203, 75.56640625, -70.23863220214844, -4.469856262207031, 247.1139678955078, -167.17593383789062, 145.40823364257812, 48.032066345214844, 161.23968505859375, 142.59783935546875, 65.4847640991211, 49.45661926269531, -9.346179962158203, -57.15806579589844, 102.6694564819336, -4.028776168823242, -15.864601135253906, 19.196517944335938, -76.86253356933594, -3.075542449951172, 32.671485900878906, 47.65726089477539, 85.71825408935547, -39.27391052246094, 54.747772216796875, -23.830886840820312, -7.916757583618164, 145.06187438964844, 31.727489471435547, 36.945281982421875, 22.14186668395996, 50.23838424682617, 23.85118865966797, -8.306524276733398, 123.94021606445312, -1.275482177734375, 12.790842056274414, 1.35504150390625, -10.583351135253906, 54.134483337402344, 101.49078369140625, 12.008752822875977, -50.975433349609375, 126.27015686035156, 139.99972534179688, 1.2201881408691406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000128.npy"}
|
|
{"epoch": 0.18795888399412627, "step": 129, "batch_size": 64, "mean": 43.44950866699219, "std": 82.75946807861328, "min": -139.7572021484375, "p10": -41.58316268920898, "median": 30.585763931274414, "p90": 145.14446716308595, "max": 356.43365478515625, "pos_frac": 0.65625, "sample": [3.7278690338134766, 32.30472183227539, -45.38186264038086, 108.88298034667969, -46.67523193359375, 117.70443725585938, 11.070579528808594, -41.61909484863281, 166.58172607421875, 26.72425079345703, -10.698410034179688, 104.9825439453125, 25.52762222290039, 42.206329345703125, 56.790523529052734, 72.37760162353516, 146.31484985351562, -36.32959747314453, -32.447998046875, 54.58850860595703, 212.82275390625, 89.2205810546875, 216.13925170898438, -92.40204620361328, 87.72871398925781, -87.68701171875, -41.49932098388672, 39.497100830078125, 356.43365478515625, 142.38070678710938, 28.32685661315918, 16.821502685546875, -2.4170989990234375, 75.83228302001953, -10.483444213867188, -15.500621795654297, 28.866806030273438, 121.22671508789062, 28.397499084472656, -14.961837768554688, -70.78295135498047, -8.201452255249023, 46.39826965332031, 80.7315673828125, 147.03273010253906, -139.7572021484375, 15.359861373901367, 19.263629913330078, -7.836088180541992, -7.576961517333984, 131.96092224121094, 169.39012145996094, 38.484107971191406, 47.56964111328125, 69.65486145019531, -17.501007080078125, 41.16875457763672, 52.54936218261719, -37.52781677246094, 85.51129913330078, 142.41357421875, -28.784584045410156, 76.71546173095703, -0.8427524566650391], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000129.npy"}
|
|
{"epoch": 0.1894273127753304, "step": 130, "batch_size": 64, "mean": 65.76427459716797, "std": 83.48968505859375, "min": -105.32579040527344, "p10": -20.492866516113278, "median": 47.85641860961914, "p90": 191.8082946777344, "max": 321.20233154296875, "pos_frac": 0.78125, "sample": [71.42918395996094, 39.84413528442383, 29.98773956298828, 186.10763549804688, -16.71056365966797, 12.210922241210938, 200.15573120117188, 80.42820739746094, -50.229820251464844, -8.667654037475586, 15.002630233764648, 57.77762222290039, 39.45463180541992, 182.29232788085938, 201.19091796875, 21.842445373535156, 93.38880920410156, -8.124509811401367, 36.64381408691406, 71.54728698730469, -11.264179229736328, 31.578140258789062, 6.296379089355469, 85.03445434570312, 89.12271118164062, -3.1659011840820312, 33.44287109375, 88.43504333496094, 57.23114776611328, 178.8331298828125, 138.8023681640625, 210.09963989257812, -33.71576690673828, 27.662979125976562, 145.70790100097656, -105.32579040527344, 33.339935302734375, 29.0157470703125, 32.8410530090332, -22.113853454589844, 61.562217712402344, 52.165462493896484, 73.76405334472656, 241.7224578857422, 155.40304565429688, -8.044082641601562, 321.20233154296875, -38.66063690185547, -3.777677536010742, 87.2357406616211, 34.391761779785156, 43.5473747253418, 194.25143432617188, -30.325462341308594, 141.99407958984375, -105.09027862548828, 129.11473083496094, 2.2440414428710938, 119.37445068359375, 115.61705017089844, 207.946044921875, 55.73182678222656, 31.87314224243164, 58.24082946777344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000130.npy"}
|
|
{"epoch": 0.19089574155653452, "step": 131, "batch_size": 64, "mean": 65.16259002685547, "std": 71.26805877685547, "min": -145.072265625, "p10": -0.2870721817016598, "median": 48.97182655334473, "p90": 155.26929321289063, "max": 257.54010009765625, "pos_frac": 0.890625, "sample": [48.7824821472168, 44.471282958984375, 50.68714904785156, 29.479900360107422, 257.54010009765625, -145.072265625, 56.88762283325195, 37.473121643066406, 29.52788543701172, 102.26494598388672, 110.11441040039062, 21.091533660888672, 56.710723876953125, 12.992086410522461, 95.03819274902344, 35.105567932128906, 40.069950103759766, -1.900869369506836, 136.9400177001953, 70.97222900390625, 41.58116149902344, 153.82272338867188, -12.544143676757812, -12.198257446289062, 49.161170959472656, 42.540069580078125, 83.5887451171875, 9.03750228881836, 196.14215087890625, 38.73584747314453, 41.20155715942383, -81.56539916992188, 155.88925170898438, 9.060005187988281, 216.92031860351562, 3.8324317932128906, 95.8696517944336, 28.682174682617188, 136.5980987548828, 47.52933120727539, 123.88046264648438, 31.186384201049805, 199.66644287109375, 27.50281524658203, -88.79092407226562, 120.5947265625, 6.416290283203125, 105.9291763305664, 0.05748176574707031, 26.60123062133789, -0.4347381591796875, 134.3621063232422, 11.429359436035156, 173.3999786376953, 147.52737426757812, 159.7960205078125, 87.92289733886719, 64.71664428710938, 29.42410659790039, 78.91316223144531, 124.66144561767578, 93.46900177001953, 98.03944396972656, 81.07447814941406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000131.npy"}
|
|
{"epoch": 0.19236417033773862, "step": 132, "batch_size": 64, "mean": 53.85203552246094, "std": 85.3536148071289, "min": -153.46356201171875, "p10": -14.178560638427735, "median": 42.406246185302734, "p90": 142.90870208740236, "max": 366.81182861328125, "pos_frac": 0.8125, "sample": [15.871044158935547, 10.330951690673828, 145.91091918945312, -61.24366760253906, 66.46650695800781, 37.270042419433594, 63.19032669067383, 282.6272277832031, 305.09100341796875, 7.6708831787109375, 69.57095336914062, 107.75171661376953, 19.81086540222168, 74.48377990722656, -153.46356201171875, 5.7147979736328125, 215.362060546875, -34.37811279296875, 65.54935455322266, -13.704282760620117, 2.1501083374023438, -11.405021667480469, 125.6214370727539, 66.22483825683594, 20.37377166748047, -13.964302062988281, 0.7633419036865234, 111.09313201904297, 7.2829437255859375, 3.5635223388671875, -14.2703857421875, -10.78961181640625, 19.52910804748535, 91.28145599365234, 77.90242004394531, 145.035888671875, 47.542449951171875, 65.29843139648438, 75.03900146484375, 49.583797454833984, 52.5819091796875, 33.58098220825195, 30.631465911865234, 137.9452667236328, -101.17010498046875, 49.04188919067383, 114.3711166381836, -62.994544982910156, 13.314472198486328, 93.23252868652344, -18.01548194885254, 58.150657653808594, 104.07107543945312, 15.473648071289062, 56.760833740234375, -13.4661865234375, 33.142822265625, 102.39950561523438, 27.723648071289062, 12.068218231201172, 174.67120361328125, 26.727298736572266, 51.737342834472656, 366.81182861328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000132.npy"}
|
|
{"epoch": 0.19383259911894274, "step": 133, "batch_size": 64, "mean": 47.43252944946289, "std": 74.29071807861328, "min": -102.72245788574219, "p10": -20.491673278808587, "median": 35.27021408081055, "p90": 143.267660522461, "max": 244.8885040283203, "pos_frac": 0.765625, "sample": [10.248565673828125, 69.98277282714844, -9.127883911132812, 107.87176513671875, 8.157947540283203, -39.317283630371094, -6.43011474609375, 35.657470703125, -73.03349304199219, 244.8885040283203, 50.95262145996094, 71.12860870361328, -5.260345458984375, 101.78752899169922, -75.31695556640625, 18.40185546875, 87.99868774414062, -7.96197509765625, 210.18846130371094, 6.44658088684082, 63.44165802001953, 80.189453125, -13.227317810058594, 129.62396240234375, 1.7835235595703125, 102.69363403320312, 46.860984802246094, 75.38627624511719, 83.14200592041016, 66.14956665039062, 10.2000732421875, 19.550636291503906, 25.216230392456055, 238.57278442382812, -1.285614013671875, 50.480438232421875, 70.21704864501953, -64.61236572265625, 206.59384155273438, 33.185150146484375, 42.0560417175293, 34.882957458496094, 52.418609619140625, 237.14703369140625, -102.72245788574219, -13.735107421875, 64.89570617675781, 173.60079956054688, 17.7730712890625, 48.1247673034668, 7.1030120849609375, 115.42245483398438, -52.842193603515625, 149.11495971679688, 98.13582611083984, -23.387344360351562, 64.95852661132812, 45.975311279296875, -9.613906860351562, 20.67424201965332, 7.618446350097656, 0.2924308776855469, 9.788928985595703, 16.574485778808594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000133.npy"}
|
|
{"epoch": 0.19530102790014683, "step": 134, "batch_size": 64, "mean": 50.91827392578125, "std": 66.5137939453125, "min": -43.87969207763672, "p10": -30.389616966247555, "median": 41.28227424621582, "p90": 129.29384536743166, "max": 324.4138488769531, "pos_frac": 0.828125, "sample": [15.81475830078125, -43.87969207763672, -36.12805938720703, 126.43214416503906, 197.09841918945312, 33.87244415283203, 18.38309097290039, 40.13528823852539, -31.909263610839844, 47.634849548339844, 119.52705383300781, 23.27829360961914, 12.030380249023438, 7.168556213378906, 51.119422912597656, 39.53492736816406, -26.843774795532227, 46.127349853515625, 77.15719604492188, 23.219505310058594, 5.74980354309082, 10.226280212402344, 97.35533905029297, 19.259315490722656, 162.37120056152344, 3.078216552734375, 108.04824829101562, 94.77308654785156, 51.765140533447266, 17.218124389648438, 47.64257049560547, 130.2677764892578, 2.3635311126708984, 47.42559051513672, 160.72866821289062, 112.79476165771484, 127.0213394165039, -40.28400421142578, 81.79974365234375, -17.139907836914062, 55.853729248046875, -34.07884216308594, 64.66082763671875, 6.912992477416992, 324.4138488769531, 11.685138702392578, 78.99464416503906, 39.806182861328125, 71.07736206054688, -4.603996276855469, 63.43780517578125, -23.195899963378906, -35.360694885253906, 176.16787719726562, 59.311561584472656, 6.4620361328125, 59.328758239746094, 39.085636138916016, 42.42926025390625, 71.75414276123047, -39.686561584472656, 78.68746948242188, 31.391193389892578, 151.99722290039062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000134.npy"}
|
|
{"epoch": 0.19676945668135096, "step": 135, "batch_size": 64, "mean": 73.2041015625, "std": 92.93647003173828, "min": -86.2076416015625, "p10": -34.68207778930663, "median": 52.88039016723633, "p90": 212.27519378662112, "max": 352.334228515625, "pos_frac": 0.8125, "sample": [28.506668090820312, 129.30755615234375, 31.056629180908203, 243.22393798828125, 123.29718017578125, 73.8278579711914, 244.3993682861328, 41.002803802490234, 239.40145874023438, 215.13243103027344, 128.096435546875, 86.28236389160156, 187.9637451171875, 126.19825744628906, 271.4989013671875, 115.16860961914062, 32.04754638671875, 195.50152587890625, 12.67142105102539, -37.21863555908203, 1.409811019897461, 111.95722961425781, 157.42227172851562, 77.62954711914062, 39.27131652832031, 97.38397216796875, 2.6647186279296875, 352.334228515625, -22.683704376220703, 108.69610595703125, -58.53497314453125, 200.42013549804688, 205.60830688476562, -73.78630065917969, 40.00053024291992, -16.826595306396484, 27.315696716308594, 55.99645233154297, 124.73989868164062, -1.37982177734375, -57.93870544433594, 52.801605224609375, 82.57479858398438, 55.93927001953125, 1.3987159729003906, 52.95917510986328, 4.402858734130859, -13.982803344726562, 47.16035461425781, 16.77370834350586, 33.73876953125, 110.44157409667969, 91.41560363769531, 86.5749740600586, 6.63726806640625, 97.13453674316406, 44.48826599121094, 228.43850708007812, 13.204338073730469, -86.2076416015625, -28.763442993164062, -37.650909423828125, -41.26713562011719, 7.784111022949219], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000135.npy"}
|
|
{"epoch": 0.19823788546255505, "step": 136, "batch_size": 64, "mean": 48.954044342041016, "std": 91.8631591796875, "min": -73.37477111816406, "p10": -45.74725914001465, "median": 28.49736976623535, "p90": 121.53847579956056, "max": 407.9271240234375, "pos_frac": 0.6875, "sample": [61.94028854370117, 71.2911605834961, 103.3582763671875, 93.95970153808594, -6.632789611816406, -48.86715316772461, 295.301025390625, 130.65235900878906, -18.669158935546875, 66.39047241210938, 101.14225769042969, 65.28919982910156, -21.897323608398438, 13.071399688720703, 75.56541442871094, 54.33355712890625, 80.41458129882812, 34.68449401855469, 113.4661865234375, 318.4679870605469, -2.1588134765625, 72.50215148925781, 407.9271240234375, 3.8148651123046875, 115.97169494628906, -3.9505233764648438, 221.34307861328125, -7.569911956787109, 258.4736328125, 23.93436050415039, 9.677635192871094, 3.041790008544922, 58.208580017089844, -70.63323974609375, 52.61967468261719, -64.39022827148438, 21.063949584960938, -24.653160095214844, 88.85484313964844, 88.42623901367188, 11.584161758422852, -15.841472625732422, -14.062759399414062, 62.66218566894531, 9.171916961669922, 1.3610210418701172, -24.827228546142578, 32.59392166137695, 92.1588134765625, 119.37146759033203, -46.357521057128906, -60.123985290527344, 35.79779052734375, 77.57820892333984, -73.37477111816406, 2.281951904296875, 69.11485290527344, -22.363445281982422, -44.32331466674805, 24.40081787109375, -4.118568420410156, 122.46719360351562, 3.2088088989257812, -61.06703186035156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000136.npy"}
|
|
{"epoch": 0.19970631424375918, "step": 137, "batch_size": 64, "mean": 76.9243392944336, "std": 112.35306549072266, "min": -155.94601440429688, "p10": -26.437416076660153, "median": 49.164344787597656, "p90": 249.27919616699245, "max": 384.16986083984375, "pos_frac": 0.84375, "sample": [-43.00120544433594, 11.007831573486328, 71.79203796386719, 119.37999725341797, 151.95242309570312, 32.40300750732422, 47.935150146484375, 89.71243286132812, -22.38741683959961, 102.29389953613281, 3.584930419921875, 95.68246459960938, 384.16986083984375, 18.493467330932617, 38.521453857421875, -111.41058349609375, 112.82551574707031, 12.295673370361328, 35.69537353515625, 150.2581024169922, 155.2628173828125, 80.21126556396484, 3.009632110595703, 383.29754638671875, 112.15388488769531, 324.77880859375, 188.3812255859375, 123.37745666503906, 19.4357852935791, 167.58071899414062, -91.39701843261719, 11.017913818359375, 67.40196990966797, 11.618415832519531, 133.69955444335938, 9.542205810546875, 124.75993347167969, 50.37443542480469, 7.665596008300781, 48.91688537597656, 63.88481903076172, 42.71709442138672, -3.0038986206054688, -155.94601440429688, -44.22278594970703, -72.55867767333984, 362.611083984375, -28.17313003540039, 39.485130310058594, 295.9627990722656, 54.17768096923828, 129.26577758789062, 3.520477294921875, 90.76055908203125, -16.560365676879883, 28.69383430480957, 49.41180419921875, 9.246082305908203, 0.6995639801025391, 102.19324493408203, 303.87432861328125, 35.35050964355469, 124.09598541259766, 275.3783264160156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000137.npy"}
|
|
{"epoch": 0.2011747430249633, "step": 138, "batch_size": 64, "mean": 58.114654541015625, "std": 71.87110137939453, "min": -114.43649291992188, "p10": -17.50610809326172, "median": 42.20337104797363, "p90": 151.28023071289067, "max": 275.4041748046875, "pos_frac": 0.78125, "sample": [156.1363525390625, -14.487762451171875, 71.55155944824219, 138.98941040039062, 9.112152099609375, 114.52545928955078, -17.668197631835938, -1.123931884765625, 7.3747406005859375, 172.4095916748047, 31.143062591552734, 18.524063110351562, 41.35221862792969, 11.362695693969727, 26.55531120300293, -114.43649291992188, 16.207372665405273, -20.714717864990234, -0.17459869384765625, -13.586463928222656, 185.5174560546875, 174.2708740234375, 15.541706085205078, 10.225196838378906, 116.08380126953125, 130.1786346435547, 130.53598022460938, -4.019325256347656, 91.07793426513672, 136.15318298339844, 56.88848876953125, 29.19875144958496, 124.20149230957031, 6.8475341796875, 136.55441284179688, 169.58177185058594, 74.8636474609375, 43.05452346801758, -68.73858642578125, 61.369529724121094, 64.06232452392578, 28.60125732421875, 86.16136169433594, 0.9622859954833984, -6.312034606933594, -55.92393493652344, 95.91546630859375, 139.94927978515625, 15.77145004272461, 40.44361114501953, 111.1055908203125, 33.79503631591797, 136.05064392089844, -33.36548614501953, -19.73171043395996, 58.57278823852539, -17.127899169921875, 70.16513061523438, 19.738616943359375, 275.4041748046875, 103.41582489013672, 56.52326965332031, 106.25089263916016, 156.47119140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000138.npy"}
|
|
{"epoch": 0.2026431718061674, "step": 139, "batch_size": 64, "mean": 69.69491577148438, "std": 86.99911499023438, "min": -109.46734619140625, "p10": -29.612622070312494, "median": 55.30386734008789, "p90": 167.07206878662114, "max": 369.9289245605469, "pos_frac": 0.796875, "sample": [-19.46373748779297, 155.17654418945312, 99.50556182861328, 173.67893981933594, -32.504173278808594, 64.87044525146484, 121.59984588623047, -12.747291564941406, 82.69035339355469, 143.27838134765625, -109.46734619140625, 57.327178955078125, -49.54243087768555, 272.598876953125, 139.83544921875, 46.622154235839844, 86.25333404541016, 53.280555725097656, 34.534141540527344, 62.204708099365234, 143.291015625, 73.20791625976562, -70.05695343017578, 125.63509368896484, 50.80836486816406, 1.3875808715820312, 154.2473907470703, 82.74406433105469, 15.784097671508789, 38.647499084472656, 151.72235107421875, 23.180299758911133, 125.94572448730469, 116.01268768310547, 37.38750457763672, 90.66062927246094, -22.22357940673828, -1.0368938446044922, 9.249870300292969, 15.843727111816406, 80.16492462158203, -0.371978759765625, 16.88744354248047, 150.18429565429688, 22.98516845703125, 263.0988464355469, 90.51812744140625, -22.86566925048828, 76.69994354248047, -46.05303192138672, 82.9174575805664, -46.9595947265625, 34.14586639404297, 84.23963928222656, 369.9289245605469, 39.06660842895508, 252.3577117919922, 39.261688232421875, 45.39124298095703, -38.922882080078125, 191.31427001953125, 29.1539306640625, 42.99192810058594, 172.17015075683594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000139.npy"}
|
|
{"epoch": 0.20411160058737152, "step": 140, "batch_size": 64, "mean": 63.4018440246582, "std": 89.44715118408203, "min": -128.67735290527344, "p10": -51.1243595123291, "median": 64.66999435424805, "p90": 171.37539672851565, "max": 346.57183837890625, "pos_frac": 0.734375, "sample": [-26.75271987915039, 122.87307739257812, 346.57183837890625, -74.75318145751953, -13.551933288574219, 123.36424255371094, 85.55101776123047, 146.6265411376953, 56.10523223876953, 162.3393096923828, 118.36004638671875, 72.0396957397461, 113.58232116699219, 40.964759826660156, 23.648452758789062, 84.26991271972656, 272.4944152832031, 11.42165756225586, 6.435932159423828, 75.08983612060547, 19.918533325195312, 65.15484619140625, 87.61383056640625, 137.81192016601562, -2.0508193969726562, -55.994964599609375, 254.29701232910156, -53.61273193359375, 36.464202880859375, 79.02386474609375, 21.007814407348633, 64.18514251708984, 8.406171798706055, 115.04237365722656, -32.78474426269531, 61.049896240234375, 93.00254821777344, 67.15621185302734, -8.335662841796875, 164.89727783203125, 130.49087524414062, -33.219512939453125, 175.3437042236328, -128.67735290527344, -1.5329132080078125, 62.86962127685547, 62.24098205566406, 91.6654052734375, 187.8402099609375, 23.65520477294922, -7.069269180297852, 154.3893280029297, -12.36515998840332, 19.898056030273438, -88.24607849121094, -52.4589729309082, -72.5126724243164, 65.73582458496094, 179.94729614257812, 174.1517333984375, -48.01026153564453, 84.46949768066406, 82.95242309570312, 137.2269287109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000140.npy"}
|
|
{"epoch": 0.2055800293685756, "step": 141, "batch_size": 64, "mean": 59.664695739746094, "std": 95.8642807006836, "min": -245.87655639648438, "p10": -48.8665557861328, "median": 59.9300594329834, "p90": 183.569808959961, "max": 293.49139404296875, "pos_frac": 0.75, "sample": [61.801605224609375, 38.592750549316406, -16.72921371459961, 101.13851165771484, 79.79246520996094, -33.61730194091797, 108.12677001953125, 59.42173767089844, 1.6284713745117188, -7.613285064697266, 60.43838119506836, 263.74835205078125, -245.87655639648438, 48.451210021972656, -4.794624328613281, 34.30303955078125, -0.5732936859130859, -56.0010986328125, 264.6234130859375, -63.81306457519531, -38.39728546142578, 198.8325958251953, 48.16616439819336, 84.166748046875, 147.30093383789062, 80.78778076171875, 107.57003784179688, 3.0399856567382812, -62.854461669921875, 73.32330322265625, 111.42962646484375, 78.69795227050781, 34.69287872314453, 170.91061401367188, 247.69906616210938, 25.488725662231445, 117.35466766357422, 25.07213592529297, -19.841432571411133, 16.71316909790039, -25.26258087158203, 50.561988830566406, -55.08588409423828, 188.99517822265625, 5.123270034790039, 136.19012451171875, 190.53857421875, 119.37167358398438, 11.684009552001953, -165.80072021484375, 18.872512817382812, 97.6536865234375, 63.17939758300781, 89.36070251464844, 113.70736694335938, 129.09127807617188, 293.49139404296875, 129.19735717773438, -0.6419467926025391, 160.02037048339844, 69.39237976074219, 104.93350219726562, -53.35338592529297, 4.1188201904296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000141.npy"}
|
|
{"epoch": 0.20704845814977973, "step": 142, "batch_size": 64, "mean": 68.77471923828125, "std": 108.09823608398438, "min": -212.98211669921875, "p10": -47.50071716308592, "median": 35.54212188720703, "p90": 235.541943359375, "max": 297.48779296875, "pos_frac": 0.765625, "sample": [-22.065391540527344, 264.2941589355469, 33.245033264160156, 41.91747283935547, -30.657371520996094, 246.0963897705078, -32.91093063354492, -21.951047897338867, -92.27911376953125, 32.01002883911133, 139.39566040039062, 97.08877563476562, 159.83026123046875, 30.085390090942383, 34.859947204589844, 230.7392120361328, 16.332263946533203, -212.98211669921875, -156.27053833007812, 73.32886505126953, 18.51146697998047, 34.14289855957031, 18.22470474243164, 297.48779296875, 165.0576934814453, 36.22429656982422, 148.4558868408203, 26.005712509155273, 146.60467529296875, -21.7854061126709, 105.52362060546875, 119.07063293457031, 234.7266082763672, 82.36729431152344, -0.8978443145751953, 206.69004821777344, 216.55979919433594, 18.205032348632812, 20.543615341186523, 42.534393310546875, -67.05801391601562, -53.753482818603516, 3.796588897705078, 235.89137268066406, 67.33252716064453, 244.47215270996094, 14.109661102294922, 23.544151306152344, 72.5848388671875, -57.64079284667969, 57.14628982543945, -17.807662963867188, 7.106096267700195, -82.77850341796875, 191.5092010498047, 242.6427001953125, 24.30156707763672, 189.60000610351562, 25.662307739257812, -11.355031967163086, 125.94478607177734, 250.46560668945312, 45.36112976074219, 126.1447525024414], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000142.npy"}
|
|
{"epoch": 0.20851688693098386, "step": 143, "batch_size": 64, "mean": 43.79881286621094, "std": 94.29562377929688, "min": -156.56546020507812, "p10": -55.83059616088867, "median": 28.435501098632812, "p90": 144.1503875732422, "max": 332.3783874511719, "pos_frac": 0.65625, "sample": [84.56964111328125, -7.206268310546875, 53.86438751220703, 25.645912170410156, 284.75872802734375, 90.86522674560547, -56.628013610839844, 96.80835723876953, 106.17286682128906, 23.81292152404785, 144.55218505859375, -87.57260131835938, -53.96995544433594, 137.487548828125, 85.04375457763672, -123.38062286376953, -47.8173828125, -2.799745559692383, 103.85987854003906, -6.0804901123046875, 33.761268615722656, 259.19305419921875, -68.58541107177734, 98.25881958007812, 16.777305603027344, -16.626800537109375, 46.92082214355469, 143.21286010742188, 29.248634338378906, 14.608501434326172, -12.692398071289062, -49.101531982421875, -60.39521789550781, 3.969247817993164, 134.66993713378906, 5.686920166015625, 44.347084045410156, 67.04415893554688, 25.6136474609375, 332.3783874511719, -35.686912536621094, -12.813758850097656, 34.35393524169922, 83.02519226074219, -2.6446285247802734, 54.567718505859375, 155.43637084960938, 136.28463745117188, -5.493915557861328, -52.20849609375, -88.80523681640625, 287.10888671875, -34.385841369628906, 79.9761734008789, 59.23143005371094, 29.974138259887695, 110.04443359375, 27.62236785888672, 180.3021240234375, -156.56546020507812, -9.181846618652344, 42.474700927734375, 4.188636779785156, 16.043670654296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000143.npy"}
|
|
{"epoch": 0.20998531571218795, "step": 144, "batch_size": 64, "mean": 53.788116455078125, "std": 78.23200225830078, "min": -106.958251953125, "p10": -33.41476974487305, "median": 43.817298889160156, "p90": 169.40383605957032, "max": 242.93838500976562, "pos_frac": 0.734375, "sample": [-33.533905029296875, 110.55303192138672, -4.753889083862305, 171.93350219726562, -101.8635025024414, -19.873153686523438, 67.63292694091797, 118.31734466552734, 43.276268005371094, 14.397686004638672, 63.43629455566406, -41.58500289916992, 74.16607666015625, 104.13070678710938, 57.695556640625, 163.50128173828125, 50.588539123535156, 200.17318725585938, 104.48700714111328, 141.44659423828125, 173.05233764648438, 191.44241333007812, -23.90410041809082, -106.958251953125, 102.8016586303711, -25.244781494140625, 49.18018341064453, -3.5575790405273438, 68.89425659179688, 23.137388229370117, -54.265403747558594, 75.09275817871094, 65.62272644042969, 15.5711669921875, -48.27423858642578, 139.04531860351562, 46.76128387451172, 27.281002044677734, 24.688894271850586, 89.37387084960938, 30.690460205078125, 93.85089874267578, 242.93838500976562, 27.087270736694336, 27.113327026367188, 75.70999145507812, -33.13678741455078, 23.15106964111328, 21.283462524414062, 101.01654052734375, 120.49102020263672, 229.2256317138672, -1.6124267578125, -26.839813232421875, -17.092239379882812, -22.85608673095703, 219.27659606933594, 15.587406158447266, 22.963415145874023, 0.7411575317382812, 44.35832977294922, 29.343338012695312, 149.73477172851562, -44.453773498535156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000144.npy"}
|
|
{"epoch": 0.21145374449339208, "step": 145, "batch_size": 64, "mean": 57.771766662597656, "std": 80.50733184814453, "min": -119.71025085449219, "p10": -41.77843666076659, "median": 44.88104248046875, "p90": 148.09603881835938, "max": 263.5973205566406, "pos_frac": 0.75, "sample": [24.426055908203125, -46.12120056152344, -97.0699462890625, 263.5973205566406, 147.971923828125, 113.62303924560547, 243.03488159179688, 2.8041648864746094, -119.71025085449219, 57.97629928588867, 4.9766845703125, 78.83944702148438, 143.1132049560547, -54.53861618041992, 11.500232696533203, 120.44918060302734, 55.32337188720703, 40.98395538330078, 119.88749694824219, -48.270790100097656, -8.361852645874023, 36.876190185546875, 43.3001594543457, 134.52197265625, 91.34233093261719, -65.9007568359375, -9.93377685546875, 29.381702423095703, -11.111465454101562, 180.87567138671875, 22.898801803588867, 12.76251220703125, 112.04373168945312, -31.645320892333984, -56.70745849609375, 119.24983215332031, 16.326904296875, 22.39307975769043, 78.33853912353516, -9.809051513671875, 72.50367736816406, -6.151111602783203, 116.21361541748047, -15.27829360961914, 114.58151245117188, 191.28392028808594, 103.71049499511719, -21.831680297851562, 51.70820236206055, 124.99840545654297, 30.779468536376953, 40.93655014038086, 141.48191833496094, 155.77609252929688, 59.798152923583984, 18.244686126708984, -5.821891784667969, 65.54853820800781, 133.620361328125, 118.15935516357422, 231.02938842773438, 46.4619255065918, 11.852462768554688, 148.14923095703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000145.npy"}
|
|
{"epoch": 0.21292217327459617, "step": 146, "batch_size": 64, "mean": 44.597129821777344, "std": 88.64009857177734, "min": -239.86172485351562, "p10": -71.05649948120117, "median": 58.54458045959473, "p90": 156.06778717041018, "max": 243.79176330566406, "pos_frac": 0.671875, "sample": [33.64196014404297, -33.665496826171875, 107.54576110839844, -64.17914581298828, 18.961732864379883, 2.8320388793945312, 73.38711547851562, 63.02696228027344, 76.24275207519531, 64.20836639404297, 214.23138427734375, 95.31578826904297, 43.38665771484375, -8.426483154296875, 24.648643493652344, -6.951271057128906, 93.37494659423828, -9.468948364257812, 39.584922790527344, -18.214494705200195, -5.882175445556641, 30.77911376953125, 60.42420196533203, 116.61811828613281, -101.93742370605469, 70.36962127685547, 150.28314208984375, -239.86172485351562, -74.00393676757812, 208.1759033203125, -79.61194610595703, 73.97654724121094, -3.9143333435058594, 73.001708984375, 186.94895935058594, -79.8497543334961, 49.09098815917969, 60.341041564941406, 118.8978500366211, 243.79176330566406, 5.379417419433594, -5.9987640380859375, 58.244876861572266, 79.63087463378906, 58.84428405761719, 103.6448745727539, 103.00223541259766, -100.92442321777344, 59.11845397949219, -75.97003173828125, 81.41143798828125, 139.13442993164062, 158.5469207763672, 74.31758117675781, -8.988555908203125, 92.13054656982422, 35.73612976074219, -35.80867004394531, 85.14054870605469, -53.541656494140625, 228.72264099121094, -32.734527587890625, 188.9276123046875, -50.870758056640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000146.npy"}
|
|
{"epoch": 0.2143906020558003, "step": 147, "batch_size": 64, "mean": 60.34022521972656, "std": 90.45318603515625, "min": -148.34124755859375, "p10": -31.65120162963867, "median": 46.38974380493164, "p90": 193.26815185546877, "max": 299.40435791015625, "pos_frac": 0.75, "sample": [108.49249267578125, -62.97639846801758, 42.04887390136719, 230.51303100585938, -28.752769470214844, -8.794197082519531, 97.4781494140625, 97.06840515136719, 50.78166198730469, -43.708656311035156, 28.69838523864746, 74.56834411621094, 58.92033386230469, 123.45657348632812, 23.865352630615234, 195.12689208984375, 112.65422058105469, 19.691680908203125, -7.455963134765625, 188.93109130859375, 161.30203247070312, -23.677467346191406, 174.6533660888672, 21.795740127563477, 270.9537353515625, 46.53339385986328, 196.95559692382812, 105.45709228515625, -11.799982070922852, 46.24609375, 29.45229721069336, -130.47283935546875, 3.9680938720703125, 114.5360107421875, 100.6793212890625, -27.494430541992188, 61.34150695800781, 299.40435791015625, 76.29334259033203, 20.399860382080078, 176.00357055664062, 14.335784912109375, -15.444211959838867, 14.190544128417969, 33.60981750488281, 59.327980041503906, -12.235366821289062, 79.85391235351562, 64.92311096191406, -32.89338684082031, 21.3265380859375, 49.38984298706055, 19.40017318725586, 173.00071716308594, -25.878921508789062, 57.92104721069336, 242.3570556640625, -33.21513366699219, 30.765594482421875, -148.34124755859375, 212.70001220703125, -59.1842041015625, 58.5601806640625, 44.16633224487305], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000147.npy"}
|
|
{"epoch": 0.21585903083700442, "step": 148, "batch_size": 64, "mean": 51.533233642578125, "std": 82.40593719482422, "min": -102.71951293945312, "p10": -29.22577972412109, "median": 34.79726791381836, "p90": 143.45980224609377, "max": 336.9862060546875, "pos_frac": 0.796875, "sample": [-6.596851348876953, 50.438629150390625, 5.10784912109375, 83.48876953125, -56.879478454589844, 97.61302947998047, 80.36970520019531, 39.283382415771484, -42.05071258544922, 187.87095642089844, 18.39844512939453, 140.86996459960938, 22.312623977661133, -16.32274627685547, 152.90260314941406, 251.6890869140625, 2.6171951293945312, 15.2822265625, -39.733787536621094, -72.98110961914062, 133.6666717529297, 129.78358459472656, -10.419416427612305, 91.97549438476562, 94.62904357910156, 40.9617919921875, 63.72491455078125, 50.406917572021484, 106.41477966308594, 1.2786674499511719, 144.56973266601562, 31.3990478515625, 36.87962341308594, 67.12336730957031, -30.927635192871094, 45.89542770385742, 30.50110626220703, 17.087539672851562, 111.49150848388672, 54.56257247924805, 133.68338012695312, 33.901954650878906, -81.78868103027344, 48.15281677246094, 35.69258117675781, 22.979759216308594, 182.150634765625, 47.228050231933594, -4.7233734130859375, -25.254783630371094, -102.71951293945312, 41.167205810546875, 9.775558471679688, 6.356992721557617, 33.09064483642578, 6.851018905639648, 33.265403747558594, 336.9862060546875, 318.42083740234375, 7.689840316772461, -2.9343204498291016, 1.4414443969726562, 86.43403625488281, 5.594758987426758], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000148.npy"}
|
|
{"epoch": 0.2173274596182085, "step": 149, "batch_size": 64, "mean": 64.60050964355469, "std": 81.1431884765625, "min": -182.0203399658203, "p10": -8.874083328247067, "median": 56.97078895568848, "p90": 150.89680023193358, "max": 306.36065673828125, "pos_frac": 0.828125, "sample": [110.35436248779297, 65.56507873535156, 203.42080688476562, 60.6282844543457, 105.12516021728516, 68.34854125976562, 73.25789642333984, -13.538259506225586, -18.998634338378906, -9.79708480834961, 21.957664489746094, -6.7204132080078125, 58.306331634521484, 44.24468231201172, 60.5096435546875, -63.977752685546875, 89.76043701171875, 93.9150390625, 46.52105712890625, 133.40066528320312, 109.125244140625, 172.18917846679688, 101.77281951904297, 28.47931480407715, 14.669469833374023, 97.73321533203125, 50.32769012451172, 124.83208465576172, 72.05204772949219, 129.1788330078125, 63.77747344970703, -1.1791820526123047, 306.36065673828125, 36.957862854003906, 150.746826171875, 26.799545288085938, 225.8013916015625, 66.31311798095703, 29.372299194335938, 106.45000457763672, 24.03921127319336, 75.3624267578125, 11.476289749145508, 44.799720764160156, -0.7868270874023438, 54.18896484375, 14.111148834228516, 55.63524627685547, -13.803436279296875, -182.0203399658203, 97.85342407226562, 15.311897277832031, 1.568695068359375, 150.96107482910156, 11.196004867553711, 274.632568359375, 17.142017364501953, 42.12152099609375, 140.80364990234375, 263.2383728027344, 8.306068420410156, -6.560604095458984, 85.66371154785156, -84.85136413574219], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000149.npy"}
|
|
{"epoch": 0.21879588839941264, "step": 150, "batch_size": 64, "mean": 63.096435546875, "std": 93.77081298828125, "min": -82.7943115234375, "p10": -23.533736419677727, "median": 36.44698143005371, "p90": 168.97206878662112, "max": 409.53924560546875, "pos_frac": 0.828125, "sample": [287.36883544921875, 324.929931640625, 17.71851348876953, 31.326568603515625, 60.609527587890625, -34.86304473876953, 178.753173828125, 35.33976745605469, -82.2635726928711, 68.94361114501953, 409.53924560546875, -5.842657089233398, -15.165283203125, 17.989105224609375, -8.28656005859375, 132.1071014404297, 19.965835571289062, 51.131019592285156, 47.40095520019531, 74.33753204345703, 37.554195404052734, 55.449241638183594, 150.3697509765625, 26.964202880859375, 107.29396057128906, 89.01813507080078, 190.1161346435547, 130.07244873046875, 17.03722381591797, 69.04753112792969, 13.358451843261719, -46.60319519042969, 84.29108428955078, 171.60679626464844, 69.37664794921875, 8.083084106445312, 162.82437133789062, 21.676862716674805, 141.95428466796875, -27.120216369628906, 93.73463439941406, -37.88715362548828, -39.860862731933594, 6.0289306640625, 116.7437515258789, 4.290470123291016, 27.545005798339844, 63.7164306640625, 17.335914611816406, 39.28260803222656, 54.64599609375, 67.47276306152344, 63.786277770996094, 18.872634887695312, 34.81627655029297, 349.44647216796875, 14.374662399291992, 16.60365867614746, 10.378206253051758, 32.544097900390625, 93.69597625732422, -10.062370300292969, -82.7943115234375, 0.051052093505859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000150.npy"}
|
|
{"epoch": 0.22026431718061673, "step": 151, "batch_size": 64, "mean": 62.93691635131836, "std": 78.74150848388672, "min": -146.86474609375, "p10": -27.229729080200194, "median": 54.71632385253906, "p90": 176.61609344482423, "max": 243.765625, "pos_frac": 0.828125, "sample": [-146.86474609375, 218.2666778564453, 45.06510925292969, -1.8977890014648438, 41.49696350097656, 54.91795349121094, 69.00609588623047, 111.95790100097656, 74.49870300292969, 26.6239013671875, 177.35354614257812, -42.476646423339844, -27.934837341308594, 101.13865661621094, 54.51469421386719, -43.44021987915039, -7.853580474853516, 41.292564392089844, 56.898712158203125, 46.502174377441406, 233.08502197265625, 57.45957565307617, 143.52493286132812, 58.08013153076172, -54.12494659423828, 172.69174194335938, 24.754928588867188, 5.547943115234375, -25.584476470947266, 21.0740966796875, 38.11225128173828, 11.581151962280273, 19.72146224975586, 8.59461784362793, 103.26161193847656, 174.89537048339844, -64.61128234863281, 64.14280700683594, 101.21269989013672, 100.7218017578125, 7.960878372192383, 58.819610595703125, 96.71614074707031, 78.73910522460938, 11.363189697265625, 190.94888305664062, 104.06336975097656, 53.28559112548828, 118.73731994628906, -1.8123397827148438, 208.12791442871094, 9.982879638671875, 2.9581222534179688, 138.14773559570312, 15.336135864257812, 14.224206924438477, 200.35272216796875, 105.25153350830078, 243.765625, 172.46600341796875, -46.89720153808594, 75.48359680175781, 20.261398315429688, 106.47311401367188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000151.npy"}
|
|
{"epoch": 0.22173274596182085, "step": 152, "batch_size": 64, "mean": 76.53138732910156, "std": 92.72167205810547, "min": -87.21243286132812, "p10": -39.08784866333007, "median": 59.96224403381348, "p90": 206.79201507568362, "max": 307.6248779296875, "pos_frac": 0.796875, "sample": [30.18341064453125, 55.902992248535156, 158.05862426757812, 172.66346740722656, 73.48338317871094, 199.34420776367188, 59.7000617980957, 137.49359130859375, 307.6248779296875, 33.96068572998047, 152.2926025390625, 125.4827880859375, 34.92204666137695, -63.37702178955078, -2.9425735473632812, 229.2847900390625, 139.07455444335938, 48.95148468017578, 66.97053527832031, 62.55708312988281, 56.84657287597656, 51.96227264404297, -44.31182098388672, 72.51762390136719, 2.3003273010253906, 306.97967529296875, 125.5694351196289, 216.3571014404297, 40.68992614746094, 22.32994842529297, 62.51264953613281, -53.6934814453125, 21.91103744506836, -3.85498046875, 2.7438182830810547, 60.22442626953125, 20.35327911376953, 194.38229370117188, 35.294677734375, 46.33998107910156, 279.8299865722656, -22.813575744628906, 55.04437255859375, 198.85104370117188, 66.86293029785156, -30.415908813476562, 238.08790588378906, 34.483604431152344, -73.41730499267578, 104.26953125, 6.787393569946289, 138.49766540527344, 89.3670654296875, 66.11876678466797, 209.9839324951172, -42.73760986328125, 159.43862915039062, -87.21243286132812, -6.2095947265625, 113.91355895996094, -42.82586669921875, -30.571739196777344, 131.962158203125, 81.62781524658203], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000152.npy"}
|
|
{"epoch": 0.22320117474302498, "step": 153, "batch_size": 64, "mean": 65.17752075195312, "std": 99.81834411621094, "min": -174.7130126953125, "p10": -35.07184906005859, "median": 54.3188591003418, "p90": 194.54412536621098, "max": 412.63946533203125, "pos_frac": 0.75, "sample": [82.91603088378906, -2.328338623046875, 9.03448486328125, -5.688636779785156, 20.217693328857422, -7.3118896484375, 28.58612060546875, 153.863525390625, -40.99230194091797, 184.73793029785156, 93.589599609375, 8.321296691894531, -38.57137680053711, 81.72300720214844, 236.94874572753906, -67.86449432373047, 75.57992553710938, 16.5783634185791, 55.06867218017578, 102.23857879638672, 53.56904602050781, 412.63946533203125, 38.29285430908203, 63.843353271484375, 78.010498046875, 180.62600708007812, -24.34490203857422, -41.75446319580078, 198.7467803955078, 202.1715087890625, 137.4299774169922, 36.96955108642578, 235.90634155273438, 6.554256439208984, 1.701425552368164, 1.6465682983398438, 162.35855102539062, 94.09033966064453, -33.54392623901367, -35.7266731262207, 122.63902282714844, 13.351821899414062, 8.977645874023438, 264.1441345214844, 23.106918334960938, -15.703842163085938, -58.191307067871094, 63.54307556152344, 8.304954528808594, -174.7130126953125, 87.19566345214844, 350.48095703125, 75.39019775390625, -30.772228240966797, 62.34038543701172, -3.9802989959716797, 93.07838439941406, 42.47138214111328, 129.87432861328125, 133.32278442382812, 73.30746459960938, -20.716758728027344, 61.356163024902344, 106.71983337402344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000153.npy"}
|
|
{"epoch": 0.22466960352422907, "step": 154, "batch_size": 64, "mean": 62.42070007324219, "std": 103.75210571289062, "min": -200.0953369140625, "p10": -50.61149024963379, "median": 53.1880989074707, "p90": 188.33187408447264, "max": 294.7155456542969, "pos_frac": 0.65625, "sample": [-10.657060623168945, 52.31953430175781, -200.0953369140625, 23.689172744750977, 294.7155456542969, 54.056663513183594, -11.744524002075195, 144.9627685546875, 171.81228637695312, -50.66941452026367, 87.66788482666016, 227.2865753173828, -85.3992919921875, 123.09710693359375, 120.29815673828125, 59.56705093383789, 2.245086669921875, 19.884628295898438, 34.24199295043945, -4.3458404541015625, 106.63851928710938, 60.66659164428711, -3.2045249938964844, 59.317020416259766, -27.68817138671875, 187.98995971679688, -49.701358795166016, 202.20516967773438, 148.39370727539062, -86.75227355957031, -33.029396057128906, 177.11373901367188, -74.16912078857422, 168.52798461914062, -50.47633361816406, 195.30252075195312, 292.8734436035156, 170.25643920898438, -2.6581573486328125, 175.70860290527344, 188.47840881347656, 117.17926025390625, 15.877269744873047, 79.81922912597656, 183.22251892089844, -11.727058410644531, -17.204727172851562, -5.300676345825195, -92.22452545166016, 113.76979064941406, 157.5595245361328, -20.563156127929688, 3.015249252319336, 29.10430908203125, 7.422576904296875, -79.82206726074219, 108.34086608886719, 135.84164428710938, 118.28990936279297, -48.96356201171875, -9.144973754882812, 2.8463497161865234, 77.68318176269531, 271.17822265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000154.npy"}
|
|
{"epoch": 0.2261380323054332, "step": 155, "batch_size": 64, "mean": 63.25199508666992, "std": 93.8571548461914, "min": -211.89219665527344, "p10": -22.805210494995112, "median": 61.51967239379883, "p90": 168.02356719970712, "max": 346.95538330078125, "pos_frac": 0.75, "sample": [-6.3214569091796875, 84.43929290771484, 54.201026916503906, -4.25579833984375, 90.06901550292969, 12.088546752929688, 82.64229583740234, -28.249954223632812, 74.8118667602539, 22.573997497558594, -5.9971771240234375, 61.90570831298828, 100.73892974853516, -79.13174438476562, 104.9903335571289, 74.28694915771484, -24.94302749633789, 46.355552673339844, 61.133636474609375, 0.240692138671875, 72.57582092285156, 66.0464096069336, 42.03056335449219, 346.95538330078125, 190.15206909179688, 8.326845169067383, 129.82843017578125, 105.33240509033203, 10.107635498046875, -117.50471496582031, 21.90665626525879, -2.1355438232421875, 22.386627197265625, 91.68878173828125, 188.93560791015625, -211.89219665527344, 276.34393310546875, 44.24993133544922, 131.32052612304688, -17.816970825195312, 3.1816368103027344, 92.226806640625, 146.07371520996094, 43.118377685546875, 286.11968994140625, 95.70310974121094, 278.9224853515625, 147.58470153808594, -73.09494018554688, 96.48959350585938, -5.3052215576171875, 176.7830810546875, -11.835132598876953, 112.09850311279297, 86.9486083984375, -48.61376190185547, 58.05016326904297, 129.87063598632812, 122.15160369873047, 10.541248321533203, 131.47642517089844, -3.5379199981689453, -17.32541847229004, 70.08287811279297], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000155.npy"}
|
|
{"epoch": 0.2276064610866373, "step": 156, "batch_size": 64, "mean": 36.69999313354492, "std": 106.28523254394531, "min": -275.2389221191406, "p10": -92.2230743408203, "median": 24.820392608642578, "p90": 135.37381286621095, "max": 286.06109619140625, "pos_frac": 0.734375, "sample": [130.53265380859375, 57.17736053466797, 286.06109619140625, 251.43702697753906, 171.02149963378906, 110.24981689453125, -199.18441772460938, 3.1621551513671875, -82.54763793945312, 18.619956970214844, -201.64271545410156, 131.492431640625, 95.35137176513672, -23.01134490966797, -62.574771881103516, -131.80276489257812, 42.37952423095703, 100.75044250488281, 249.99461364746094, 73.88664245605469, 19.466598510742188, 118.77716064453125, 49.115386962890625, 21.50680160522461, 3.5506839752197266, -7.062904357910156, 15.44343376159668, 30.09180450439453, 251.57275390625, -12.669326782226562, 241.36688232421875, 137.03726196289062, 118.57501220703125, 66.86109924316406, 6.898612976074219, 5.622720718383789, 70.70780181884766, -122.18963623046875, -275.2389221191406, -75.8515853881836, 78.68851470947266, -51.251869201660156, 4.1170501708984375, 126.24626159667969, 13.422866821289062, -96.36968994140625, 82.79590606689453, 119.33895111083984, 76.32026672363281, 8.838602066040039, -3.087156295776367, 76.72518157958984, 59.19274139404297, 54.355377197265625, -107.40575408935547, -44.24641418457031, 10.443283081054688, 23.867965698242188, 7.934295654296875, -1.3104171752929688, 25.77281951904297, 74.12361907958984, 8.557506561279297, 116.79510498046875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000156.npy"}
|
|
{"epoch": 0.2290748898678414, "step": 157, "batch_size": 64, "mean": 79.97874450683594, "std": 103.37088775634766, "min": -296.0206298828125, "p10": -27.427484893798827, "median": 71.3656120300293, "p90": 190.09415283203128, "max": 375.78466796875, "pos_frac": 0.828125, "sample": [159.42916870117188, 167.57009887695312, 85.4817123413086, 184.7319793701172, 132.34979248046875, 8.196578979492188, 75.51033782958984, 145.1187744140625, 4.657432556152344, 146.94699096679688, 56.57301712036133, 205.619140625, 50.000030517578125, 83.48365783691406, 217.00543212890625, 192.39222717285156, 147.98797607421875, 103.38780212402344, 48.86598205566406, 159.71792602539062, 15.911354064941406, 34.35302734375, 67.22088623046875, -27.94046401977539, 112.24864196777344, 243.9077606201172, -3.503997802734375, 32.39069747924805, -3.55877685546875, 108.96824645996094, -296.0206298828125, -39.821327209472656, 26.416427612304688, 48.5654296875, 128.9255828857422, 79.97464752197266, 247.42428588867188, 127.295166015625, 107.02464294433594, -31.91370964050293, 26.316905975341797, 276.6546325683594, 33.57627868652344, 58.09965515136719, 8.110210418701172, 178.42462158203125, 106.54383850097656, -26.230533599853516, 172.656005859375, 120.60749816894531, 184.6007080078125, 39.983795166015625, -56.3244743347168, 21.455322265625, 183.03372192382812, 150.54591369628906, 375.78466796875, 54.700828552246094, 43.95994567871094, -140.85858154296875, 18.765472412109375, -59.62367248535156, 12.715385437011719, -17.752838134765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000157.npy"}
|
|
{"epoch": 0.2305433186490455, "step": 158, "batch_size": 64, "mean": 85.15373229980469, "std": 129.79788208007812, "min": -168.0992431640625, "p10": -43.85169105529784, "median": 51.01091766357422, "p90": 272.2266433715821, "max": 477.7098693847656, "pos_frac": 0.71875, "sample": [8.638843536376953, 11.526878356933594, 42.25885009765625, 51.57013702392578, 47.95274353027344, -30.194473266601562, 33.66624069213867, 85.08145904541016, 36.97438049316406, -52.06387710571289, 213.953125, -112.8878173828125, 248.4960174560547, 82.21287536621094, -12.377487182617188, -53.250335693359375, -15.474084854125977, 108.90559387207031, -12.726903915405273, 13.848320007324219, 227.1854248046875, -35.98200225830078, 282.39691162109375, -168.0992431640625, 299.63372802734375, 72.99561309814453, 134.92495727539062, 20.605932235717773, 36.15605163574219, 11.522891998291016, 79.88143920898438, 83.27831268310547, -19.291053771972656, 151.24386596679688, -51.9932746887207, -4.8471527099609375, 336.81927490234375, 103.5009994506836, -117.95230865478516, 324.111572265625, 407.58843994140625, -2.3161773681640625, 62.477684020996094, 477.7098693847656, 23.746917724609375, 10.182594299316406, -47.22441482543945, 121.13186645507812, 229.66000366210938, 108.08487701416016, 372.79791259765625, 166.7162628173828, 100.06196594238281, 167.6868133544922, -16.462682723999023, 173.79867553710938, 88.71034240722656, -8.745895385742188, 26.184425354003906, 95.06383514404297, 246.39944458007812, 50.451698303222656, 155.0250244140625, -21.093233108520508], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000158.npy"}
|
|
{"epoch": 0.23201174743024963, "step": 159, "batch_size": 64, "mean": 58.1856689453125, "std": 103.80752563476562, "min": -202.02503967285156, "p10": -57.62577590942382, "median": 58.278175354003906, "p90": 190.72889709472656, "max": 314.29376220703125, "pos_frac": 0.75, "sample": [8.844818115234375, 41.65857696533203, 131.8000030517578, -46.63865661621094, 242.86962890625, 127.48934936523438, -202.02503967285156, -37.32670974731445, 125.81378173828125, 156.44482421875, 91.17008972167969, -59.113304138183594, 15.479082107543945, -21.40692138671875, 85.21072387695312, -72.4663314819336, -117.05593872070312, 51.50143051147461, 187.9818115234375, 138.13116455078125, 314.29376220703125, 7.004947662353516, -45.7569580078125, 18.777320861816406, 29.832077026367188, -71.60279846191406, 19.751876831054688, -54.154876708984375, 83.92794799804688, 53.642791748046875, -19.439701080322266, 191.90621948242188, 85.43880462646484, 12.40945816040039, 113.10001373291016, -22.04730796813965, 57.98396301269531, -97.36622619628906, -2.5820388793945312, 224.622802734375, 142.191650390625, 220.13482666015625, 61.204734802246094, 71.04826354980469, 0.6610260009765625, 275.3395080566406, 13.047208786010742, 1.1657085418701172, 72.15290069580078, 58.5723876953125, 131.9595947265625, 25.23009490966797, -52.278648376464844, -162.71978759765625, 69.69417572021484, 95.99354553222656, 95.87108612060547, 95.90809631347656, 20.85001564025879, 299.6709899902344, 182.2584228515625, 63.528900146484375, 69.92868041992188, 124.36479187011719], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000159.npy"}
|
|
{"epoch": 0.23348017621145375, "step": 160, "batch_size": 64, "mean": 78.42352294921875, "std": 127.36312866210938, "min": -142.99940490722656, "p10": -68.38926162719726, "median": 63.46631622314453, "p90": 238.48291931152343, "max": 457.6695556640625, "pos_frac": 0.6875, "sample": [100.48798370361328, -11.044706344604492, 184.60977172851562, 155.3506317138672, -11.384807586669922, -70.03118896484375, 185.14126586914062, 178.53416442871094, 170.90847778320312, 12.072542190551758, -78.94766998291016, -5.8256988525390625, 137.05271911621094, 238.67562866210938, 114.3878402709961, -70.15892028808594, 36.66643142700195, -26.091812133789062, -140.2211456298828, 226.5265655517578, 274.6365966796875, -115.79768371582031, 238.03326416015625, -64.02307891845703, 140.1597137451172, 47.237396240234375, 6.351448059082031, -13.445877075195312, 457.6695556640625, 60.359901428222656, 14.35002326965332, 66.5727310180664, -34.36767578125, 379.78155517578125, -2.8368301391601562, 67.21745300292969, 98.91631317138672, 159.2276611328125, 369.42340087890625, 301.2987060546875, 33.33599090576172, 209.63528442382812, -142.99940490722656, 205.21951293945312, 93.93260955810547, 16.508941650390625, -28.588823318481445, 18.659093856811523, 111.0027084350586, 112.23982238769531, -35.682212829589844, 32.23247528076172, 112.07630920410156, -64.55809783935547, 88.29989624023438, 78.97230529785156, -9.100252151489258, 0.6564083099365234, -95.72502899169922, 153.74752807617188, 30.621009826660156, 252.47064208984375, -43.81884765625, 112.49490356445312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000160.npy"}
|
|
{"epoch": 0.23494860499265785, "step": 161, "batch_size": 64, "mean": 68.96272277832031, "std": 102.6685562133789, "min": -128.23831176757812, "p10": -47.91453552246094, "median": 48.46743392944336, "p90": 229.77516632080088, "max": 312.36700439453125, "pos_frac": 0.75, "sample": [76.08839416503906, 194.216796875, -49.13323974609375, 138.4630889892578, 296.2932434082031, -128.23831176757812, -49.979156494140625, -67.38082885742188, 94.43302917480469, -42.417747497558594, 46.4990234375, 98.92219543457031, 78.21806335449219, -69.74580383300781, -4.182781219482422, 119.07371520996094, 312.36700439453125, 79.21368408203125, 136.388916015625, 265.87762451171875, 15.676246643066406, 54.560848236083984, -4.82708740234375, -33.64506530761719, 31.710235595703125, -9.355976104736328, 122.3812026977539, 37.16522979736328, 279.1807556152344, 297.3194580078125, 117.35304260253906, -33.24000549316406, 2.197490692138672, -26.918224334716797, 238.89981079101562, 116.4853744506836, 32.045257568359375, -45.070892333984375, 17.781402587890625, 82.81593322753906, -13.034011840820312, 23.777198791503906, 9.316986083984375, 33.48310852050781, 17.754981994628906, 29.37933349609375, 205.171875, 70.66747283935547, 16.343162536621094, 140.5223388671875, 208.4843292236328, -103.53285217285156, 66.412353515625, 168.39990234375, 34.430809020996094, 39.518096923828125, 33.49393844604492, 122.12168884277344, 89.83714294433594, -85.76470184326172, 50.43584442138672, 255.59872436523438, 113.12522888183594, 70.1793441772461], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000161.npy"}
|
|
{"epoch": 0.23641703377386197, "step": 162, "batch_size": 64, "mean": 68.85668182373047, "std": 110.2022933959961, "min": -174.91551208496094, "p10": -40.50316467285156, "median": 59.39109802246094, "p90": 199.9821533203125, "max": 448.7046813964844, "pos_frac": 0.75, "sample": [1.0594253540039062, 52.79348373413086, 105.08004760742188, 246.68263244628906, 56.33625793457031, 220.32846069335938, 29.055784225463867, 135.8911590576172, 43.069068908691406, -9.408666610717773, -21.133148193359375, 195.64825439453125, 67.92282104492188, -105.75849151611328, -36.447723388671875, -18.502689361572266, 161.42825317382812, 165.10171508789062, -6.0762481689453125, 122.54179382324219, 74.87828826904297, 27.844703674316406, 49.42161560058594, 8.333908081054688, 20.8566837310791, 201.83953857421875, 131.63558959960938, -33.39387512207031, -174.91551208496094, 144.67840576171875, 100.62625885009766, -111.60150146484375, -15.571531295776367, 17.214277267456055, 148.36019897460938, 66.60610961914062, 226.58184814453125, 448.7046813964844, 63.71790313720703, 117.6795883178711, 85.41779327392578, 90.691162109375, 149.20790100097656, 158.7545623779297, 143.14480590820312, 5.5687103271484375, -42.2412109375, -0.9060745239257812, 96.04835510253906, 41.817222595214844, -85.60777282714844, 162.2737274169922, 3.2722625732421875, -91.99735260009766, 388.55584716796875, 24.949951171875, -113.91202545166016, 32.02342224121094, 75.78467559814453, 86.01464080810547, 62.44593811035156, -34.253875732421875, 39.753997802734375, 210.91162109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000162.npy"}
|
|
{"epoch": 0.23788546255506607, "step": 163, "batch_size": 64, "mean": 66.64288330078125, "std": 107.73143005371094, "min": -156.284423828125, "p10": -50.26417083740234, "median": 59.91253662109375, "p90": 249.68630676269535, "max": 350.09222412109375, "pos_frac": 0.671875, "sample": [-6.938240051269531, -9.244606018066406, 262.64813232421875, 350.09222412109375, -26.12059783935547, -119.74946594238281, 92.79283905029297, -9.20953369140625, -9.35662841796875, 75.7454833984375, 129.85519409179688, -33.84355926513672, 105.5017318725586, 314.396240234375, 103.7608413696289, 41.628692626953125, 98.35582733154297, -16.063827514648438, 16.833763122558594, -60.733680725097656, 53.657867431640625, 150.0352783203125, 106.23330688476562, -27.92237091064453, 96.48268127441406, -0.5515327453613281, 122.06086730957031, -156.284423828125, -67.82723236083984, 104.39940643310547, 260.40887451171875, 12.648456573486328, 134.6378173828125, 11.96351432800293, 68.14360046386719, -10.296878814697266, -52.30543518066406, 290.8199462890625, 124.08655548095703, 62.9788818359375, -72.28265380859375, 38.787437438964844, -45.501220703125, 240.7498779296875, 42.727874755859375, 97.62823486328125, -29.39629364013672, 284.939453125, 144.7406463623047, 29.314905166625977, 58.560943603515625, 107.38177490234375, 61.264129638671875, 180.01956176757812, 78.15003204345703, -24.279441833496094, 82.01719665527344, 10.179336547851562, -92.01004791259766, 0.44612693786621094, 253.51620483398438, 85.36276245117188, 159.38671875, -10.279367446899414], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000163.npy"}
|
|
{"epoch": 0.2393538913362702, "step": 164, "batch_size": 64, "mean": 82.15338134765625, "std": 103.5638198852539, "min": -147.525634765625, "p10": -20.6383207321167, "median": 69.3184814453125, "p90": 207.42846984863286, "max": 417.7083740234375, "pos_frac": 0.84375, "sample": [-140.80445861816406, 166.08816528320312, 165.7470703125, 164.18167114257812, 219.31939697265625, 51.91791534423828, 78.45735168457031, 122.6304931640625, 70.22732543945312, 71.32450103759766, 244.14132690429688, 47.607383728027344, 19.111406326293945, 51.07591247558594, -67.70011901855469, 131.6824951171875, 63.30223846435547, 65.45531463623047, -20.490137100219727, 68.40963745117188, 1.7608623504638672, 126.54764556884766, 73.81710815429688, 196.9732666015625, 31.62431526184082, 152.18478393554688, -22.427387237548828, -87.76502227783203, 146.31173706054688, 15.99915885925293, 44.66477966308594, -19.529037475585938, 152.25146484375, 175.14779663085938, 134.5771484375, 113.16508483886719, 35.210853576660156, 99.2343978881836, 13.891853332519531, 164.52098083496094, 38.732177734375, 86.40611267089844, 68.29943084716797, 30.99786376953125, 44.10804748535156, 2.0480194091796875, 71.04216003417969, 32.102027893066406, -147.525634765625, 136.83438110351562, 75.25143432617188, 49.56956481933594, 335.2001647949219, 2.178497314453125, 344.07391357421875, 229.439208984375, 1.500396728515625, 211.90927124023438, 82.75811767578125, -52.426971435546875, -20.701828002929688, 102.59405517578125, 417.7083740234375, -4.1291351318359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000164.npy"}
|
|
{"epoch": 0.24082232011747431, "step": 165, "batch_size": 64, "mean": 52.59346008300781, "std": 99.6047134399414, "min": -150.33958435058594, "p10": -81.61388778686523, "median": 39.368804931640625, "p90": 199.76520690917977, "max": 277.62506103515625, "pos_frac": 0.703125, "sample": [88.9291763305664, 84.9374008178711, -86.54830932617188, 18.700416564941406, -99.42501831054688, -3.0272293090820312, 75.69561004638672, 114.07705688476562, -18.276947021484375, -82.00702667236328, 125.73062896728516, -148.82049560546875, -81.35334777832031, 36.93244934082031, 37.61009216308594, -95.90752410888672, -10.689411163330078, -48.19825744628906, 152.693603515625, 150.96368408203125, 82.25227355957031, 23.668601989746094, 15.789260864257812, 277.62506103515625, -150.33958435058594, 35.778656005859375, -19.0174617767334, -45.889747619628906, -5.14459228515625, 141.0664520263672, -81.72554779052734, 19.436513900756836, 214.3419189453125, 182.69940185546875, 47.40374755859375, -29.411659240722656, 79.712646484375, 60.68656921386719, 77.2723617553711, 4.750688552856445, 41.12751770019531, 58.398597717285156, 150.72767639160156, 76.21944427490234, 222.24810791015625, -21.282135009765625, 97.61865234375, 60.24977111816406, 6.230583190917969, -68.8607177734375, 30.005279541015625, 219.21328735351562, 30.367431640625, -38.349876403808594, 59.09577560424805, 263.05169677734375, 26.026222229003906, 263.12530517578125, 206.93780517578125, 124.14274597167969, 114.01066589355469, 12.309486389160156, 107.36691284179688, 183.02914428710938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000165.npy"}
|
|
{"epoch": 0.2422907488986784, "step": 166, "batch_size": 64, "mean": 98.27557373046875, "std": 139.42352294921875, "min": -92.2435302734375, "p10": -34.65679473876952, "median": 64.8866138458252, "p90": 275.8610504150392, "max": 566.3701171875, "pos_frac": 0.8125, "sample": [448.509765625, 311.58251953125, 29.313922882080078, 490.4876708984375, -38.66413116455078, 5.019512176513672, 19.650577545166016, 69.48880004882812, 172.27145385742188, 44.436370849609375, 123.43666076660156, -9.991291046142578, 10.95170783996582, 159.50393676757812, -62.47607421875, 76.9910888671875, 158.88330078125, 17.801498413085938, 2.4459571838378906, 189.48931884765625, 3.048868179321289, 566.3701171875, 39.94882583618164, -49.072994232177734, -52.143089294433594, -52.23330307006836, 37.149559020996094, 141.75347900390625, -92.2435302734375, 243.82363891601562, 289.59136962890625, 468.9508361816406, 107.71634674072266, 67.3253173828125, 27.6390380859375, 361.2008056640625, 12.529975891113281, 199.60198974609375, 26.054656982421875, 150.599365234375, -22.416873931884766, 86.11788940429688, 18.998031616210938, 11.233345031738281, 177.35403442382812, 129.84254455566406, 214.03172302246094, -9.731342315673828, 76.34229278564453, 170.5189208984375, 31.65426254272461, 83.52533721923828, -61.85377502441406, 62.44791030883789, -5.909034729003906, -25.30634307861328, 115.65264129638672, 2.308380126953125, 114.6673583984375, 6.289703369140625, 89.56895446777344, 17.289838790893555, 203.62411499023438, 86.64289093017578], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000166.npy"}
|
|
{"epoch": 0.24375917767988253, "step": 167, "batch_size": 64, "mean": 103.19129943847656, "std": 115.97958374023438, "min": -140.95623779296875, "p10": -18.148437499999996, "median": 86.8951416015625, "p90": 249.67114715576182, "max": 436.15704345703125, "pos_frac": 0.828125, "sample": [103.93629455566406, 64.26771545410156, 149.34719848632812, 217.19313049316406, 111.80438232421875, 124.2625732421875, 91.29949951171875, 67.64863586425781, 96.38710021972656, 47.69345474243164, 436.15704345703125, 189.16534423828125, 70.89759826660156, -15.762588500976562, 27.187225341796875, 60.99480056762695, 78.53099822998047, -56.834373474121094, 131.5762481689453, 183.46160888671875, 23.037004470825195, 41.11162567138672, 78.95323181152344, 58.268394470214844, 105.28288269042969, -140.95623779296875, 276.1710205078125, 206.226318359375, -23.18499755859375, 170.91107177734375, -12.557548522949219, 105.22149658203125, 76.19102478027344, 56.46105194091797, 368.01611328125, 157.7550048828125, 260.5696716308594, 82.49078369140625, 202.53480529785156, -125.69293212890625, 224.2412567138672, 262.3982849121094, 334.7462463378906, 147.0511474609375, 403.76690673828125, -15.381538391113281, 175.26553344726562, 143.26080322265625, 220.9287109375, 175.60519409179688, 102.55094909667969, 11.355056762695312, -0.9027156829833984, 29.838050842285156, 56.13600158691406, 131.49203491210938, 103.36540985107422, 56.28966522216797, 21.363571166992188, -97.97689056396484, 22.675479888916016, 14.74485969543457, -45.42295837402344, -19.170944213867188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000167.npy"}
|
|
{"epoch": 0.24522760646108663, "step": 168, "batch_size": 64, "mean": 75.70437622070312, "std": 103.38095092773438, "min": -86.1849136352539, "p10": -50.684083175659175, "median": 53.44643211364746, "p90": 200.01539306640626, "max": 399.284423828125, "pos_frac": 0.796875, "sample": [16.28314208984375, 113.39205169677734, 26.362281799316406, 21.49120330810547, 6.375711441040039, 32.025665283203125, 133.66363525390625, -46.32265090942383, 20.219318389892578, 16.10253143310547, -81.86676025390625, 186.9267120361328, 399.284423828125, 90.76441955566406, -54.37471008300781, 196.11203002929688, 177.35040283203125, 242.6710662841797, 201.68826293945312, 47.184226989746094, 214.48190307617188, 270.7938232421875, 175.7098846435547, 165.60910034179688, -44.40125274658203, -52.55326843261719, -15.9111328125, 126.03857421875, 149.35728454589844, 132.37628173828125, 88.20953369140625, 33.00956726074219, -64.18276977539062, 97.42864990234375, 37.097808837890625, 119.0643310546875, -67.93504333496094, -10.520755767822266, -30.845218658447266, 27.23614501953125, 29.00261878967285, 88.43501281738281, 60.89520263671875, 94.50436401367188, 113.08198547363281, 66.72035217285156, 28.275182723999023, -61.41992950439453, 276.0365295410156, 56.613990783691406, 158.00074768066406, 22.135923385620117, 24.423730850219727, 27.650238037109375, 127.24488067626953, 54.945098876953125, 26.12666893005371, -86.1849136352539, 51.9477653503418, -32.376220703125, 374.1309509277344, 19.519813537597656, 94.76846313476562, 135.205322265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000168.npy"}
|
|
{"epoch": 0.24669603524229075, "step": 169, "batch_size": 64, "mean": 66.46955108642578, "std": 129.17422485351562, "min": -271.70806884765625, "p10": -66.80077896118162, "median": 43.99698448181152, "p90": 220.34434356689457, "max": 560.31982421875, "pos_frac": 0.75, "sample": [76.2109146118164, 87.16573333740234, 3.771617889404297, 19.073158264160156, 211.6922149658203, 111.17176055908203, 19.210006713867188, 157.0628204345703, 87.82257843017578, 34.18928909301758, -1.1896858215332031, 45.81877136230469, -271.70806884765625, 2.2320404052734375, 53.682308197021484, -24.439620971679688, 26.806705474853516, -83.89784240722656, 85.3841552734375, 167.71832275390625, 144.53106689453125, -52.43125915527344, 64.85565185546875, -81.86824035644531, 207.78500366210938, 224.05239868164062, 64.14849853515625, 3.06231689453125, -14.634033203125, 29.998268127441406, 207.7987060546875, -3.2576980590820312, 17.717309951782227, 67.1411361694336, 88.73193359375, 128.8036651611328, 253.37356567382812, 24.447057723999023, 67.98129272460938, 231.8995819091797, 6.09520149230957, 388.96673583984375, -124.7173080444336, 92.06207275390625, 4.209846496582031, 52.111083984375, 31.249435424804688, -72.95914459228516, 336.0099792480469, -14.600784301757812, 560.31982421875, -5.020233154296875, 93.2453384399414, 14.629920959472656, 284.9467468261719, -9.84918212890625, 81.87571716308594, 18.732589721679688, 42.17519760131836, -27.61583709716797, -121.86566162109375, 92.45796203613281, 195.7908172607422, -146.112548828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000169.npy"}
|
|
{"epoch": 0.24816446402349487, "step": 170, "batch_size": 64, "mean": 78.5929946899414, "std": 105.24567413330078, "min": -227.08193969726562, "p10": -31.82768859863281, "median": 66.83643341064453, "p90": 212.44104309082036, "max": 326.0834655761719, "pos_frac": 0.828125, "sample": [187.47747802734375, -132.84783935546875, 135.20028686523438, 83.15020751953125, 200.804931640625, 80.00796508789062, 123.59465026855469, 17.243728637695312, 143.6584930419922, 2.5821990966796875, 90.92594146728516, -1.2941627502441406, 72.601318359375, -35.54524230957031, 9.702592849731445, -50.12763977050781, 111.75501251220703, 236.1373291015625, 16.52142333984375, 294.7087097167969, 4.720300674438477, -18.50482749938965, 165.82321166992188, 47.9402961730957, 2.0860214233398438, 21.646507263183594, -227.08193969726562, -30.184532165527344, 75.22698974609375, 195.55718994140625, 76.87623596191406, 124.65945434570312, 53.6235466003418, 78.335205078125, 175.026123046875, -52.375274658203125, 307.9747314453125, 6.2653045654296875, 8.204526901245117, 326.0834655761719, 72.60693359375, -90.72816467285156, 57.06222915649414, 120.8131103515625, 197.2724609375, -32.531898498535156, 27.848360061645508, 5.531290054321289, 37.066139221191406, 61.07154846191406, 90.24542999267578, 48.787376403808594, 235.22500610351562, 41.76777267456055, 133.93246459960938, 58.444725036621094, 187.75599670410156, 217.42794799804688, -17.150409698486328, 194.76974487304688, 234.5889892578125, 12.76945686340332, 189.503662109375, 17.711322784423828], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000170.npy"}
|
|
{"epoch": 0.24963289280469897, "step": 171, "batch_size": 64, "mean": 63.276702880859375, "std": 130.76609802246094, "min": -255.5075225830078, "p10": -90.3816551208496, "median": 58.221946716308594, "p90": 209.469775390625, "max": 574.9740600585938, "pos_frac": 0.703125, "sample": [-82.31124877929688, 217.5814971923828, 23.78008270263672, 91.110107421875, -97.6507797241211, -49.897247314453125, 8.848350524902344, 108.13836669921875, -77.58343505859375, 135.14215087890625, 11.847190856933594, 150.67990112304688, 89.52603149414062, 108.94798278808594, 49.566429138183594, -56.36253356933594, 90.91586303710938, -96.16792297363281, 109.09703063964844, -8.212394714355469, 201.99594116210938, 84.40469360351562, 51.052032470703125, 85.65522003173828, 327.21453857421875, 69.53575134277344, -93.84040069580078, 574.9740600585938, -95.35747528076172, 49.475372314453125, 71.63545227050781, -255.5075225830078, 419.456298828125, 132.81124877929688, 90.10713195800781, -117.8187255859375, 209.22714233398438, 101.30299377441406, 9.464035034179688, 290.25048828125, 7.22601318359375, 70.73638153076172, 209.57376098632812, -22.369489669799805, -94.403076171875, 11.26068115234375, 65.39186096191406, 82.93907165527344, -57.93879318237305, 102.91708374023438, -25.144920349121094, -55.301605224609375, 135.6165771484375, 40.685585021972656, 78.14959716796875, -10.01957893371582, 33.976959228515625, 157.21719360351562, -18.79358673095703, -6.372711181640625, 88.3777847290039, 16.656644821166992, 257.57977294921875, 48.71437454223633], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000171.npy"}
|
|
{"epoch": 0.2511013215859031, "step": 172, "batch_size": 64, "mean": 66.58623504638672, "std": 110.25837707519531, "min": -188.6351318359375, "p10": -69.86798782348632, "median": 74.27996444702148, "p90": 193.79675292968753, "max": 451.4119873046875, "pos_frac": 0.6875, "sample": [-34.3768310546875, 103.73171997070312, -111.34133911132812, 87.32744598388672, 35.21101379394531, -45.54723358154297, 145.082275390625, 99.75471496582031, 74.40091705322266, -74.48043060302734, -121.85789489746094, 55.42795944213867, 50.90247344970703, -78.72111511230469, 147.61886596679688, 86.66885375976562, 16.710674285888672, 68.95610046386719, -51.59780502319336, 68.13321685791016, -35.62129211425781, 92.11679077148438, 184.015869140625, 140.83863830566406, 20.48365020751953, 74.15901184082031, 228.53305053710938, 195.646484375, -76.96612548828125, 237.1708984375, -25.926231384277344, 109.24335479736328, 101.67249298095703, 91.95550537109375, 117.88018798828125, 301.2025451660156, 451.4119873046875, 46.701171875, 120.65099334716797, 30.129650115966797, -20.179771423339844, -44.534149169921875, 80.4306640625, 80.64425659179688, 189.480712890625, -24.976165771484375, -11.415407180786133, 27.537900924682617, -29.905319213867188, 170.15231323242188, 155.58009338378906, -4.5281829833984375, -59.105621337890625, 95.70353698730469, 105.0307846069336, -188.6351318359375, 71.29597473144531, 102.831787109375, 227.13690185546875, 94.58255004882812, -15.656883239746094, -81.08091735839844, 237.2763671875, 176.55081176757812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000172.npy"}
|
|
{"epoch": 0.2525697503671072, "step": 173, "batch_size": 64, "mean": 67.0791015625, "std": 95.90992736816406, "min": -185.72247314453125, "p10": -24.512316513061517, "median": 49.90376091003418, "p90": 209.48938903808596, "max": 344.5565185546875, "pos_frac": 0.765625, "sample": [47.79362487792969, 80.2350845336914, 34.554046630859375, -5.232872009277344, -2.1371116638183594, 48.77415466308594, 1.6487922668457031, 136.41571044921875, 118.29762268066406, 9.443166732788086, -14.025115966796875, -41.04693603515625, 16.660463333129883, -2.4698867797851562, 79.96475219726562, 267.44366455078125, 72.21331024169922, 55.90407180786133, 69.67339324951172, 202.35269165039062, 115.31294250488281, 87.05532836914062, 84.96471405029297, -57.302268981933594, 50.660240173339844, 26.36029815673828, 344.5565185546875, 29.023502349853516, 159.41049194335938, 212.5479736328125, 65.15543365478516, 17.4885311126709, -58.0169677734375, 112.4795150756836, 103.80047607421875, -16.803512573242188, 118.17215728759766, 7.24884033203125, -2.7701034545898438, 231.0130615234375, 63.06674575805664, 187.82244873046875, -27.816089630126953, 36.457923889160156, -7.528450012207031, 13.831829071044922, 219.240966796875, 112.26727294921875, -41.728355407714844, -2.27423095703125, 93.5533676147461, 49.147281646728516, 131.88011169433594, -67.50186157226562, -185.72247314453125, 335.40045166015625, 36.7421875, 5.430757522583008, 90.05645751953125, 18.95743179321289, 266.8503723144531, 60.89576721191406, 51.73677444458008, 45.475921630859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000173.npy"}
|
|
{"epoch": 0.2540381791483113, "step": 174, "batch_size": 64, "mean": 79.28968048095703, "std": 118.29193878173828, "min": -157.06565856933594, "p10": -51.246144866943354, "median": 59.71384048461914, "p90": 257.02567291259777, "max": 453.7724304199219, "pos_frac": 0.765625, "sample": [23.644515991210938, -15.945465087890625, -52.84898376464844, -91.7467041015625, -12.613174438476562, 35.032142639160156, -13.152202606201172, 12.334844589233398, 112.48796844482422, 199.5895233154297, 222.3898468017578, -1.5279922485351562, 61.08783721923828, 18.509374618530273, 54.75311279296875, -42.80873107910156, 127.93601989746094, 161.7725067138672, 153.3106231689453, 271.8695983886719, 58.33984375, 33.87589645385742, 61.610862731933594, -15.858514785766602, 325.9761657714844, -85.4498291015625, 134.29794311523438, 299.69329833984375, 28.985977172851562, 92.06880187988281, 9.360090255737305, -13.982067108154297, 64.22344970703125, -80.885498046875, 140.10845947265625, -47.506187438964844, 164.15609741210938, 108.31317901611328, -126.53024291992188, 273.047119140625, 305.44305419921875, 328.5495910644531, 33.01139831542969, 121.99420166015625, 8.878183364868164, -157.06565856933594, 3.36181640625, 134.1031494140625, 122.18727111816406, 142.15017700195312, 80.86653137207031, 48.625946044921875, 24.41558074951172, 138.18145751953125, -97.93958282470703, 187.94236755371094, 149.03643798828125, 93.68624877929688, 46.55591583251953, 13.660533905029297, 453.7724304199219, 122.75117492675781, 88.93905639648438, 33.51258850097656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000174.npy"}
|
|
{"epoch": 0.2555066079295154, "step": 175, "batch_size": 64, "mean": 108.45330810546875, "std": 108.2730712890625, "min": -88.28738403320312, "p10": -8.24448833465576, "median": 86.6563720703125, "p90": 242.66899414062502, "max": 428.94464111328125, "pos_frac": 0.828125, "sample": [21.91706657409668, 36.907127380371094, 56.43286895751953, -88.28738403320312, 86.56977844238281, 163.46859741210938, 232.88134765625, 85.45568084716797, 22.94220733642578, 151.3407440185547, 296.307861328125, -72.18975830078125, -10.516876220703125, 311.0262451171875, 105.92010498046875, 31.863250732421875, 218.40093994140625, 223.93540954589844, 231.32382202148438, 163.26028442382812, 178.658935546875, -56.014122009277344, 203.18707275390625, 428.94464111328125, 36.659423828125, 133.69549560546875, 320.74957275390625, 45.30610656738281, 132.84689331054688, 85.1355209350586, -5.355064392089844, 55.21868133544922, 301.4743957519531, 17.45052719116211, 5.298988342285156, 35.690086364746094, -1.6426887512207031, 263.9162292480469, 7.999019622802734, 83.70065307617188, 125.19122314453125, 120.13906860351562, 124.31761932373047, 212.683837890625, -1.3431110382080078, 217.12802124023438, 237.33187866210938, 188.46971130371094, -32.06058120727539, 100.56848907470703, 170.78570556640625, 95.18962860107422, 86.74296569824219, 154.01992797851562, 42.17702102661133, 69.12934875488281, 143.14944458007812, -9.482812881469727, 40.82312774658203, 244.95632934570312, -50.967735290527344, 66.4291000366211, -1.360382080078125, 55.11418151855469], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000175.npy"}
|
|
{"epoch": 0.25697503671071953, "step": 176, "batch_size": 64, "mean": 91.49755859375, "std": 121.00588989257812, "min": -63.83177185058594, "p10": -33.48957977294922, "median": 62.287893295288086, "p90": 221.0479476928711, "max": 508.62054443359375, "pos_frac": 0.8125, "sample": [3.238189697265625, 26.375625610351562, 100.08401489257812, 215.4659881591797, 61.138545989990234, 25.037403106689453, 14.715896606445312, 329.1025085449219, 19.302223205566406, -25.386199951171875, -32.3533935546875, 482.4090270996094, 12.858736038208008, -63.83177185058594, 223.44021606445312, 70.18820190429688, 28.335205078125, 211.4492645263672, 177.953369140625, -5.4128570556640625, -19.633865356445312, 73.07108306884766, 144.15296936035156, -48.700401306152344, 227.48190307617188, 1.8192119598388672, 28.43028450012207, 197.73439025878906, 184.8323974609375, 14.874427795410156, 117.40890502929688, -63.5994873046875, 151.44558715820312, -49.665809631347656, 37.98439025878906, 168.88040161132812, -44.20869064331055, 81.02104949951172, 186.78836059570312, 136.04164123535156, 187.56411743164062, 0.32955169677734375, -42.55218505859375, 2.6951637268066406, 144.63751220703125, 135.84646606445312, 129.37689208984375, 66.37631225585938, 134.92904663085938, 294.94720458984375, 36.496795654296875, -33.97651672363281, 18.418228149414062, 15.99067497253418, 138.3938751220703, 63.43724060058594, -15.136051177978516, 18.495609283447266, 135.5623321533203, 3.1354331970214844, 163.27310180664062, 508.62054443359375, 19.97081756591797, 328.742919921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000176.npy"}
|
|
{"epoch": 0.25844346549192365, "step": 177, "batch_size": 64, "mean": 89.67583465576172, "std": 119.57540893554688, "min": -108.71656799316406, "p10": -23.93701953887939, "median": 60.34989547729492, "p90": 241.86781616210942, "max": 586.0198974609375, "pos_frac": 0.734375, "sample": [168.949462890625, 56.214080810546875, 230.15501403808594, 121.67339324951172, 63.311798095703125, 126.837646484375, -108.71656799316406, 93.31196594238281, 202.083251953125, 169.26806640625, 252.2129364013672, 42.20030975341797, 83.37554931640625, 257.7668151855469, 127.12033081054688, 80.30183410644531, 21.84803009033203, 367.7132263183594, 28.052024841308594, 176.44305419921875, 54.293785095214844, 151.9193878173828, -77.63533020019531, 149.71205139160156, -2.903757095336914, 45.188331604003906, 204.364013671875, -21.14093017578125, -6.340545654296875, 4.78759765625, 57.38799285888672, 290.10064697265625, 104.96360778808594, -19.47589111328125, 12.39849853515625, 79.365966796875, -16.771629333496094, 246.88758850097656, -0.41808128356933594, -28.669828414916992, -80.1270751953125, 155.693115234375, -5.279293060302734, 52.311607360839844, -90.2547607421875, 151.62411499023438, 132.33657836914062, 15.993404388427734, -8.983680725097656, -4.5784149169921875, 194.3650665283203, 13.309938430786133, 77.79032897949219, -13.147499084472656, 195.54098510742188, 33.09035110473633, 41.63435363769531, -25.135343551635742, 160.11196899414062, 42.50144958496094, 586.0198974609375, 282.4483642578125, 72.4594955444336, -28.607406616210938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000177.npy"}
|
|
{"epoch": 0.2599118942731278, "step": 178, "batch_size": 64, "mean": 114.01716613769531, "std": 142.20709228515625, "min": -148.04067993164062, "p10": -30.468666839599603, "median": 101.61541748046875, "p90": 319.33523559570324, "max": 559.3980712890625, "pos_frac": 0.796875, "sample": [46.210269927978516, 23.861366271972656, 157.43502807617188, -23.497650146484375, 138.53399658203125, 92.5973129272461, 111.4464340209961, 43.23418426513672, 20.182369232177734, 294.82391357421875, 2.0660324096679688, 112.01165771484375, 279.5423583984375, 112.86376953125, 50.421417236328125, 158.37191772460938, 402.4897155761719, 112.40870666503906, 559.3980712890625, -52.71592712402344, 208.6377410888672, 59.939056396484375, 9.408416748046875, 55.630653381347656, -5.92119026184082, 178.74343872070312, 368.7800598144531, -46.30370330810547, 65.51943969726562, -5.39190673828125, -76.70075988769531, 107.06381225585938, 223.79669189453125, 22.822341918945312, 122.5319595336914, 190.81321716308594, 134.53085327148438, -94.40704345703125, 68.07276916503906, 266.933349609375, 31.830394744873047, 120.73513793945312, -148.04067993164062, 435.6546325683594, -8.432397842407227, 159.81898498535156, 117.37158203125, 140.729248046875, -33.45624542236328, 469.2853698730469, 37.745941162109375, 136.68966674804688, 333.3603820800781, 1.5527935028076172, 8.71282958984375, 220.67530822753906, 96.16702270507812, -18.203872680664062, 61.06285095214844, 220.35067749023438, 329.840087890625, -87.55856323242188, 184.74606323242188, -9.722885131835938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000178.npy"}
|
|
{"epoch": 0.26138032305433184, "step": 179, "batch_size": 64, "mean": 81.7546157836914, "std": 139.3387451171875, "min": -276.90814208984375, "p10": -82.68174438476561, "median": 79.09677124023438, "p90": 301.4401031494141, "max": 378.0808410644531, "pos_frac": 0.6875, "sample": [117.49341583251953, 159.2561798095703, 175.60662841796875, 378.0808410644531, 69.07298278808594, -97.2564697265625, 184.13356018066406, 40.456050872802734, -11.969833374023438, 14.884237289428711, 107.12960815429688, 59.2451171875, 47.96484375, 86.8432388305664, -16.90088653564453, 99.33873748779297, -70.06838989257812, 319.83349609375, -36.95682907104492, -50.896453857421875, 76.83326721191406, -87.33580017089844, 352.0744323730469, 23.39710807800293, 221.53932189941406, 127.0008544921875, 156.53297424316406, 306.1080322265625, 169.4183807373047, 35.36833953857422, 80.31194305419922, 302.5817565917969, 342.80975341796875, 100.52835083007812, 79.61267852783203, 128.9009246826172, -36.32373809814453, -15.77023696899414, -1.6111526489257812, 85.23965454101562, 80.26819610595703, -35.43600845336914, 78.58086395263672, -98.55490112304688, 135.44937133789062, 340.5235290527344, -96.89867401123047, 254.0109100341797, 114.37718963623047, -122.91321563720703, -50.39056396484375, 230.0001983642578, 31.561233520507812, 241.28421020507812, 298.7762451171875, 220.6375274658203, -150.86032104492188, 163.9404296875, -61.659156799316406, -71.82228088378906, -276.90814208984375, 19.7913818359375, 24.59370994567871, -58.56298828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000179.npy"}
|
|
{"epoch": 0.26284875183553597, "step": 180, "batch_size": 64, "mean": 103.52677917480469, "std": 123.07260131835938, "min": -207.23655700683594, "p10": -27.119700050354005, "median": 88.14494323730469, "p90": 257.01291198730473, "max": 492.6019287109375, "pos_frac": 0.828125, "sample": [95.57996368408203, 166.3060302734375, -45.06721496582031, 3.0006980895996094, -63.500267028808594, 492.6019287109375, -25.412593841552734, 66.40692138671875, 104.04295349121094, 59.953704833984375, 5.9847259521484375, 233.02670288085938, -207.23655700683594, 149.60935974121094, 176.11207580566406, 130.25192260742188, 88.09917449951172, 84.22148132324219, 101.35514068603516, 63.45048522949219, 166.56210327148438, -134.42356872558594, 54.6420783996582, 290.22247314453125, 76.5103530883789, 226.67169189453125, 150.87887573242188, 7.44866943359375, 35.63335418701172, 56.426090240478516, 275.831787109375, 4.171224594116211, 246.56979370117188, 169.54666137695312, 81.2336654663086, -5.588842391967773, 99.68437957763672, 197.02923583984375, -27.153968811035156, 181.9256591796875, 82.95012664794922, 7.017585754394531, 251.91323852539062, 70.253662109375, -124.18565368652344, 86.71806335449219, 1.920684814453125, 259.198486328125, 230.62742614746094, 90.61005401611328, 137.5398712158203, 240.26992797851562, -27.03973960876465, 334.8411865234375, 88.19071197509766, -21.357925415039062, 195.23892211914062, 153.7153778076172, 29.557212829589844, -68.52454376220703, 264.0016784667969, 282.6883544921875, 30.19525909423828, 196.765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000180.npy"}
|
|
{"epoch": 0.2643171806167401, "step": 181, "batch_size": 64, "mean": 64.5262222290039, "std": 114.39883422851562, "min": -200.0332794189453, "p10": -55.17941818237305, "median": 45.94013595581055, "p90": 210.99144287109377, "max": 385.420654296875, "pos_frac": 0.765625, "sample": [129.3859100341797, -54.45880889892578, 15.186830520629883, 63.16606521606445, 110.47936248779297, -2.5374584197998047, 31.070758819580078, 41.579322814941406, 83.65018463134766, 121.06611633300781, -1.6680183410644531, 17.818735122680664, 4.985988616943359, 14.552986145019531, -152.86563110351562, 218.24935913085938, 114.13740539550781, -27.775917053222656, 76.36712646484375, -10.031352996826172, 44.42416000366211, -55.488250732421875, -16.951919555664062, 235.03367614746094, -77.81752014160156, 83.79573822021484, 121.37277221679688, 272.59918212890625, 66.90206146240234, 48.716468811035156, 123.03546905517578, 80.52591705322266, -200.0332794189453, 47.40092468261719, 23.386367797851562, 79.18618774414062, 13.022150039672852, 186.17564392089844, 139.0680694580078, -60.300933837890625, 21.45489501953125, -189.865478515625, 213.61221313476562, -16.835811614990234, 350.75982666015625, 159.80242919921875, -164.33811950683594, 59.160560607910156, -30.049972534179688, 129.68045043945312, 204.87631225585938, 385.420654296875, 143.78884887695312, 29.604000091552734, 132.205322265625, 5.583255767822266, 4.568351745605469, 1.4069747924804688, 289.63995361328125, 43.41579055786133, 29.514976501464844, 183.83969116210938, 151.54153442382812, 44.479347229003906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000181.npy"}
|
|
{"epoch": 0.2657856093979442, "step": 182, "batch_size": 64, "mean": 111.48786926269531, "std": 132.6415557861328, "min": -231.09249877929688, "p10": -51.9391086578369, "median": 103.611572265625, "p90": 276.62159118652346, "max": 381.67913818359375, "pos_frac": 0.828125, "sample": [286.1573486328125, 81.75495910644531, 305.8132629394531, 194.29458618164062, 84.64657592773438, 222.96473693847656, 237.43740844726562, 277.561279296875, 348.8660888671875, 75.95851135253906, 133.1105499267578, 48.020198822021484, 27.851762771606445, -18.951824188232422, 256.71612548828125, -231.09249877929688, 221.48336791992188, 66.75335693359375, 4.990255355834961, 44.07472229003906, 381.67913818359375, -0.4015960693359375, 254.9219970703125, 112.90031433105469, 146.01318359375, -58.891326904296875, 241.9791259765625, 94.32283020019531, 21.001346588134766, 71.25712585449219, 148.9384765625, 257.20947265625, -102.9856185913086, -117.94011688232422, 161.15225219726562, 233.83047485351562, 43.401214599609375, 149.03616333007812, 210.43844604492188, 230.61439514160156, -65.92440032958984, 73.36376190185547, 32.38684844970703, 128.17124938964844, 151.4104461669922, 175.5416259765625, 181.61761474609375, 114.91185760498047, -123.45393371582031, 6.441219329833984, 369.4463195800781, 145.4595947265625, 50.00563049316406, 11.86778450012207, -30.798828125, 7.74517822265625, -126.6898422241211, 72.55006408691406, 274.4289855957031, 17.17339324951172, 348.78759765625, -35.71726608276367, 49.27113342285156, 160.33956909179688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000182.npy"}
|
|
{"epoch": 0.26725403817914833, "step": 183, "batch_size": 64, "mean": 60.38490676879883, "std": 107.37228393554688, "min": -156.22642517089844, "p10": -73.0094612121582, "median": 57.64152908325195, "p90": 185.45636596679694, "max": 352.64239501953125, "pos_frac": 0.75, "sample": [26.279953002929688, 71.53716278076172, -3.331632614135742, 60.74897766113281, 13.14981460571289, 165.16424560546875, 149.4445037841797, 129.08914184570312, 48.745758056640625, 56.452980041503906, -73.36732482910156, 138.75613403320312, 60.88371658325195, 20.654993057250977, 29.311279296875, 82.08428955078125, 330.76904296875, 242.95220947265625, 140.77891540527344, 31.71368980407715, 65.96634674072266, 112.39863586425781, 287.61370849609375, 99.89974975585938, -12.130149841308594, 4.498983383178711, 70.38134765625, 54.48111343383789, -2.8968658447265625, -156.22642517089844, 168.7781982421875, 79.85186767578125, -55.11128234863281, 13.96314811706543, 172.25454711914062, 58.830078125, 42.288734436035156, 100.43624114990234, 135.66842651367188, 228.54440307617188, -122.29025268554688, -37.82427978515625, 352.64239501953125, -72.17444610595703, 34.6065673828125, -34.78047180175781, 78.52398681640625, -99.21484375, -112.60283660888672, -104.89212036132812, 161.29391479492188, 116.1629638671875, 193.6891326904297, 191.11428833007812, 77.85791015625, 56.34803009033203, 27.483421325683594, -64.95745086669922, 137.67562866210938, 83.69882202148438, -151.10745239257812, -56.456233978271484, 18.357955932617188, 0.1706085205078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000183.npy"}
|
|
{"epoch": 0.2687224669603524, "step": 184, "batch_size": 64, "mean": 63.327796936035156, "std": 109.94463348388672, "min": -143.05380249023438, "p10": -46.01745109558105, "median": 30.798585891723633, "p90": 207.55157623291018, "max": 336.789794921875, "pos_frac": 0.75, "sample": [29.730255126953125, -104.01485443115234, 63.323646545410156, 76.1811294555664, 198.13063049316406, 179.0877685546875, 10.74542236328125, 12.502883911132812, 159.91656494140625, -18.424095153808594, 307.3714599609375, 336.789794921875, 59.02796173095703, 210.34133911132812, -41.901039123535156, 187.05142211914062, 10.5506591796875, 214.611328125, 96.68682861328125, 266.48919677734375, -23.724510192871094, 302.26263427734375, -91.35403442382812, 16.897293090820312, 37.48024368286133, 35.52294158935547, 42.911521911621094, 11.445365905761719, 86.47834777832031, 3.680692672729492, 40.94844055175781, 95.19143676757812, 138.49302673339844, 197.6870574951172, -92.38427734375, 199.96533203125, 15.309890747070312, -25.237754821777344, -35.764034271240234, 25.479564666748047, 73.4583969116211, -20.26306915283203, 29.522499084472656, 9.564310073852539, 15.973182678222656, 201.04212951660156, 66.31122589111328, 55.74960708618164, -52.187652587890625, -127.60385131835938, -47.7816276550293, 25.89459800720215, -5.806125640869141, 4.5623931884765625, -143.05380249023438, 29.327545166015625, -40.54508972167969, 176.5757293701172, 31.86691665649414, 105.75846099853516, 6.721351623535156, -0.9719085693359375, 96.88392639160156, 326.4925842285156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000184.npy"}
|
|
{"epoch": 0.2701908957415565, "step": 185, "batch_size": 64, "mean": 50.67546081542969, "std": 120.80158233642578, "min": -196.05227661132812, "p10": -65.72291679382323, "median": 26.072877883911133, "p90": 198.2263122558594, "max": 425.5668640136719, "pos_frac": 0.625, "sample": [-161.49563598632812, -69.27690124511719, -18.829408645629883, 88.38473510742188, 71.50459289550781, 18.74321174621582, -5.858423233032227, 76.67626953125, 411.9520568847656, -4.998405456542969, 14.949779510498047, 112.70494842529297, 106.9072265625, -5.607654571533203, 253.58657836914062, 34.012725830078125, -9.227216720581055, 32.555564880371094, 138.7235870361328, -108.94839477539062, -9.715593338012695, 35.42025375366211, 25.962055206298828, 68.75840759277344, 74.31787109375, -5.485374450683594, -113.21879577636719, 19.069068908691406, 90.50098419189453, -134.46676635742188, 34.23797607421875, 24.546062469482422, 93.0136947631836, 98.57627868652344, 26.183700561523438, 356.17138671875, -50.955833435058594, 33.75102996826172, 189.62298583984375, -44.4088249206543, 33.143367767333984, 5.428899765014648, -19.780723571777344, -8.639022827148438, -21.39731788635254, 1.3965587615966797, 322.0910949707031, 139.391357421875, 145.97952270507812, -0.2043590545654297, 425.5668640136719, -30.03247833251953, 21.923160552978516, 64.80415344238281, -14.565147399902344, 201.9134521484375, 40.06901550292969, 127.1788330078125, -196.05227661132812, 257.94158935546875, -71.66830444335938, 97.2287368774414, -9.397109985351562, -57.4302864074707], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000185.npy"}
|
|
{"epoch": 0.27165932452276065, "step": 186, "batch_size": 64, "mean": 96.6115951538086, "std": 117.97352600097656, "min": -196.24459838867188, "p10": -22.207040405273435, "median": 74.91999816894531, "p90": 231.19376831054691, "max": 456.15716552734375, "pos_frac": 0.796875, "sample": [-196.24459838867188, 131.54002380371094, 311.1773986816406, 147.75645446777344, 144.13156127929688, -5.7744140625, 75.44625854492188, 48.33295822143555, 209.05130004882812, 156.36495971679688, 68.47994995117188, 218.53314208984375, 3.9484329223632812, 82.35641479492188, 37.04509353637695, -16.48685073852539, 97.04342651367188, -117.24493408203125, 40.2973518371582, 47.72749328613281, 304.012939453125, 43.375118255615234, -1.7076034545898438, 176.16140747070312, 37.659629821777344, 74.39373779296875, 98.53472137451172, 142.96224975585938, 324.06103515625, 102.25994873046875, 180.66543579101562, -23.62577247619629, 159.15878295898438, 66.88156127929688, 34.292076110839844, 127.18905639648438, 395.72650146484375, 236.6197509765625, 46.23013687133789, 5.855319976806641, 158.12396240234375, 30.14066505432129, 73.629638671875, 62.745452880859375, -62.578857421875, 398.23291015625, -19.475997924804688, 74.00260162353516, -23.377487182617188, -42.71644592285156, 456.15716552734375, 158.9173583984375, 68.7431640625, 144.9330596923828, -2.232177734375, 141.04705810546875, 94.75718688964844, -28.901756286621094, 2.4819564819335938, 116.56793212890625, 143.04397583007812, 131.481201171875, -4.273193359375, 97.50531005859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000186.npy"}
|
|
{"epoch": 0.27312775330396477, "step": 187, "batch_size": 64, "mean": 80.06629943847656, "std": 107.42435455322266, "min": -154.1185302734375, "p10": -36.58426933288574, "median": 62.09721565246582, "p90": 215.47777404785157, "max": 392.33245849609375, "pos_frac": 0.765625, "sample": [-154.1185302734375, 207.92361450195312, -23.014402389526367, 59.14955520629883, -79.2701416015625, 228.67092895507812, 84.28062438964844, 156.28732299804688, 54.29969024658203, 40.43685531616211, 70.63656616210938, 66.21946716308594, 336.2509765625, 78.4168930053711, 123.03450012207031, 207.31219482421875, 161.2300567626953, 62.37998962402344, 115.44136047363281, -42.88249588012695, 57.44728088378906, 35.462486267089844, 392.33245849609375, -34.906829833984375, 141.83004760742188, 57.16276550292969, 248.22140502929688, -36.656917572021484, 188.2895050048828, 39.664337158203125, -45.8331298828125, 82.08238983154297, 20.69286346435547, -61.828887939453125, -36.23120880126953, 159.05499267578125, -36.414756774902344, 5.838750839233398, -10.926490783691406, 214.06634521484375, 148.90049743652344, 350.00701904296875, 95.95763397216797, 43.15385437011719, 66.15655517578125, 34.66632843017578, 61.8144416809082, 45.607208251953125, -128.00479125976562, 38.08802032470703, 98.6147232055664, -2.6990280151367188, 154.75802612304688, 102.31008911132812, 75.91944885253906, 185.90818786621094, 86.75048828125, -0.49114227294921875, 59.825286865234375, 228.1158905029297, 216.08267211914062, 35.04627990722656, -20.876020431518555, 16.5991153717041], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000187.npy"}
|
|
{"epoch": 0.2745961820851689, "step": 188, "batch_size": 64, "mean": 98.17579650878906, "std": 121.02018737792969, "min": -131.13772583007812, "p10": -18.280582809448237, "median": 70.97017669677734, "p90": 289.22151489257817, "max": 494.649658203125, "pos_frac": 0.828125, "sample": [330.3936462402344, 123.80902862548828, 243.6399383544922, 69.72344970703125, 72.2121353149414, 0.061370849609375, -34.012176513671875, -115.54505157470703, 294.2806396484375, 81.97766876220703, 48.32228088378906, -27.588645935058594, 119.69171905517578, 78.85966491699219, 291.9649658203125, 253.3543243408203, 87.82592010498047, -32.284568786621094, 57.03125, 7.748077392578125, 3.6935577392578125, 28.324302673339844, 45.90423583984375, 143.01486206054688, 69.7047348022461, 31.046348571777344, 11.127578735351562, -4.248256683349609, 317.03656005859375, 46.40791320800781, 359.76513671875, -131.13772583007812, 282.82012939453125, 114.84944152832031, 149.5623779296875, 114.85903930664062, 66.17083740234375, 188.49884033203125, -10.863096237182617, 188.4698486328125, 165.96983337402344, 25.530166625976562, -88.99591064453125, -20.82729148864746, 192.2804718017578, 39.693939208984375, 219.9013671875, 42.02122116088867, 6.2198333740234375, 337.435546875, 87.39908599853516, 166.85888671875, 53.01558303833008, -11.567419052124023, 157.2728729248047, 81.52790832519531, 101.9923324584961, 494.649658203125, 94.94357299804688, 69.72821807861328, 74.48086547851562, -12.338262557983398, 22.19916534423828, 17.386972427368164], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000188.npy"}
|
|
{"epoch": 0.27606461086637296, "step": 189, "batch_size": 64, "mean": 79.58168029785156, "std": 134.24612426757812, "min": -180.94384765625, "p10": -68.00683135986327, "median": 60.598785400390625, "p90": 195.78351745605468, "max": 562.9107055664062, "pos_frac": 0.765625, "sample": [40.69878387451172, 171.99090576171875, -123.34855651855469, 356.2100830078125, -70.20286560058594, -53.29698944091797, 45.856842041015625, 9.914894104003906, -3.5176639556884766, 33.042236328125, 153.4758758544922, 149.10980224609375, 141.29534912109375, -8.07830810546875, 162.01205444335938, 44.208648681640625, 195.8895263671875, 43.20283889770508, -180.94384765625, 6.101541519165039, -83.64017486572266, 26.6498966217041, 80.48919677734375, 185.5062255859375, 147.3994598388672, 81.1781005859375, -48.890289306640625, 55.09982681274414, 34.57619857788086, 182.90380859375, 1.9068946838378906, 19.74953269958496, 95.63243103027344, 488.61199951171875, 57.997467041015625, -25.943588256835938, 23.227088928222656, 217.11952209472656, 76.08686828613281, 10.272499084472656, -142.41481018066406, 119.4594955444336, -62.88275146484375, 120.35289764404297, 63.978939056396484, 338.33660888671875, 181.5255584716797, 562.9107055664062, -19.474464416503906, 125.63345336914062, 169.95726013183594, 30.78662872314453, 81.16676330566406, 150.02590942382812, 105.91017150878906, 195.53616333007812, 250.75595092773438, 88.74606323242188, -2.1633453369140625, -174.4017333984375, 63.200103759765625, -82.55455780029297, 29.116424560546875, 160.1660919189453], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000189.npy"}
|
|
{"epoch": 0.2775330396475771, "step": 190, "batch_size": 64, "mean": 95.53245544433594, "std": 142.71243286132812, "min": -218.02952575683594, "p10": -68.1217170715332, "median": 69.45353698730469, "p90": 309.6427673339844, "max": 432.3765563964844, "pos_frac": 0.796875, "sample": [-3.9917755126953125, 107.77558135986328, 191.14181518554688, 159.1076202392578, 14.684913635253906, -91.92427825927734, 247.88458251953125, 41.29845428466797, 13.444751739501953, 365.0323486328125, 72.07339477539062, 67.76349639892578, 134.8141326904297, 18.354324340820312, 374.1639404296875, 149.94143676757812, 106.5567626953125, 89.16999816894531, 195.63473510742188, 23.35308837890625, 3.3092269897460938, 265.5447082519531, -218.02952575683594, 71.1435775756836, 275.11846923828125, -23.90802001953125, 62.162139892578125, 334.036376953125, 156.64495849609375, 57.99998474121094, -14.829421997070312, 311.8975830078125, 20.283843994140625, -3.7573413848876953, 135.34625244140625, 72.63236999511719, -102.12032318115234, -68.57508850097656, -158.56195068359375, 15.749404907226562, 100.3402328491211, -67.06385040283203, 109.40360260009766, 44.59169006347656, -27.883302688598633, -95.02525329589844, 115.70326232910156, 304.38153076171875, 51.511749267578125, 55.13066864013672, -199.1580047607422, 398.810302734375, 126.04751586914062, 19.437204360961914, 204.8014678955078, 107.51819610595703, 64.11575317382812, 387.943115234375, 4.896757125854492, 432.3765563964844, 201.91744995117188, 225.9712371826172, 37.615840911865234, 42.32672119140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000190.npy"}
|
|
{"epoch": 0.2790014684287812, "step": 191, "batch_size": 64, "mean": 138.17645263671875, "std": 154.89373779296875, "min": -146.46664428710938, "p10": -6.250457382202148, "median": 112.46092987060547, "p90": 322.95994567871094, "max": 584.6844482421875, "pos_frac": 0.859375, "sample": [481.4455871582031, 107.3055419921875, 584.6844482421875, 30.77960205078125, 307.28436279296875, 61.58543395996094, 144.53195190429688, 262.2823486328125, 14.843358993530273, 376.92462158203125, 107.46499633789062, 159.6817626953125, -6.5591583251953125, 271.3479919433594, 287.2579345703125, 20.973426818847656, 38.04692840576172, 80.7034683227539, 25.198287963867188, 71.00633239746094, 264.15240478515625, 56.45721435546875, 42.64434814453125, 304.2054443359375, 123.92888641357422, 11.349533081054688, 8.838804244995117, 568.5394287109375, 188.4965362548828, -19.035255432128906, 231.49215698242188, -79.42181396484375, 17.12847137451172, -49.263240814208984, 145.33517456054688, 178.17071533203125, 185.8934326171875, 148.49087524414062, 325.84088134765625, 32.06535339355469, 9.188676834106445, 243.59078979492188, 227.69125366210938, -146.46664428710938, 68.78450012207031, 508.9999084472656, 41.993499755859375, -70.22267150878906, 120.18775939941406, 96.24967956542969, 190.42051696777344, 316.2377624511719, 20.61725616455078, -5.530155181884766, 126.09315490722656, 176.37115478515625, -76.12654876708984, 117.45686340332031, -4.1428375244140625, 125.47034454345703, 361.66845703125, 32.98986053466797, 83.4040756225586, 166.26806640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000191.npy"}
|
|
{"epoch": 0.28046989720998533, "step": 192, "batch_size": 64, "mean": 91.24540710449219, "std": 148.73919677734375, "min": -263.3235168457031, "p10": -53.35022964477539, "median": 66.03786659240723, "p90": 315.03936157226565, "max": 542.9531860351562, "pos_frac": 0.734375, "sample": [-44.33063507080078, -149.77041625976562, 42.01300811767578, 170.2860107421875, 133.8136749267578, -36.564659118652344, -54.33995819091797, -5.062063217163086, 37.471160888671875, -263.3235168457031, 23.029296875, 92.88152313232422, -33.643951416015625, 224.16880798339844, 18.605819702148438, 42.66139221191406, -71.8460693359375, 95.2376708984375, -51.040863037109375, 176.0959930419922, -120.68192291259766, 417.2904357910156, -46.3162727355957, 77.67304992675781, -19.375350952148438, 319.548095703125, 361.18414306640625, 383.2453918457031, 156.49667358398438, 358.95367431640625, 96.329833984375, 73.58369445800781, 542.9531860351562, 100.48666381835938, 179.8341827392578, 28.077585220336914, 115.41014099121094, 217.6685028076172, 274.6590881347656, 62.92808151245117, 47.86183166503906, 304.51898193359375, 7.946741104125977, 42.14353942871094, 128.470458984375, 50.937034606933594, 62.493202209472656, -176.63990783691406, 200.70492553710938, 69.14765167236328, 81.40384674072266, -8.8026123046875, -45.385169982910156, -58.30371856689453, 12.786458969116211, 232.80172729492188, 75.6220703125, -28.359329223632812, 41.147430419921875, 208.33560180664062, 145.28799438476562, 124.18838500976562, 59.06910705566406, 336.0389709472656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000192.npy"}
|
|
{"epoch": 0.28193832599118945, "step": 193, "batch_size": 64, "mean": 53.04465103149414, "std": 141.73948669433594, "min": -424.73858642578125, "p10": -79.41808624267577, "median": 51.5461540222168, "p90": 221.7535079956055, "max": 371.7496337890625, "pos_frac": 0.671875, "sample": [112.43316650390625, -20.19493865966797, 25.823753356933594, 186.18466186523438, -424.73858642578125, 95.43633270263672, 71.90680694580078, 54.47020721435547, -423.5020751953125, 302.9881591796875, 65.97174072265625, -16.001379013061523, 184.99310302734375, -37.23275375366211, -25.462860107421875, 97.23776245117188, 110.15912628173828, 128.11676025390625, -0.42052459716796875, 226.1290283203125, 2.8729019165039062, -19.828697204589844, 12.649616241455078, 159.2628173828125, 28.91145133972168, 128.50387573242188, -56.00136947631836, -40.192298889160156, 8.411270141601562, -61.08625793457031, -22.419517517089844, 211.54396057128906, -83.60662841796875, -90.5306167602539, 250.29055786132812, -129.14608764648438, 178.70846557617188, -89.08113861083984, 13.196191787719727, 48.622100830078125, 10.1116943359375, 141.54135131835938, 266.1632995605469, 169.9881591796875, 116.1536865234375, -133.73330688476562, 318.85467529296875, 79.81333923339844, 123.6581802368164, 123.60895538330078, -65.39154815673828, 88.45547485351562, 17.585590362548828, -69.64482116699219, -39.41001892089844, 143.54327392578125, 371.7496337890625, 127.62269592285156, -60.048831939697266, 21.41155242919922, 283.32745361328125, 76.42688751220703, 15.353057861328125, 102.33917999267578], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000193.npy"}
|
|
{"epoch": 0.2834067547723935, "step": 194, "batch_size": 64, "mean": 86.09469604492188, "std": 99.99124145507812, "min": -109.02793884277344, "p10": -51.782063293457014, "median": 71.0604019165039, "p90": 235.417431640625, "max": 294.2304992675781, "pos_frac": 0.8125, "sample": [142.46517944335938, 20.214820861816406, 33.404380798339844, 5.4705352783203125, 122.33161926269531, 271.43853759765625, 103.50717163085938, 166.39317321777344, -23.075775146484375, 19.45440673828125, 28.351213455200195, 247.20355224609375, -66.87322998046875, -64.93814086914062, 43.33625030517578, 61.69921112060547, 214.11471557617188, 53.333412170410156, 68.12171936035156, 131.61660766601562, 33.523502349853516, 80.10419464111328, 14.293338775634766, 6.601016998291016, 117.2780990600586, -83.1103515625, -2.531320571899414, 229.42074584960938, 66.24735260009766, 79.62589263916016, 242.9126434326172, 131.77206420898438, 235.5964813232422, 169.48190307617188, 29.135047912597656, 276.585693359375, 64.03263854980469, 29.28849220275879, 147.1497039794922, 48.09377670288086, 188.01280212402344, -75.27407836914062, 47.057373046875, -11.437248229980469, 103.2247314453125, 160.7125244140625, -3.2206153869628906, -109.02793884277344, 34.014183044433594, -57.46112060546875, 113.70939636230469, -62.792457580566406, 60.58634948730469, 234.99964904785156, 76.49288940429688, 294.2304992675781, 133.66189575195312, 73.99908447265625, 290.69384765625, 92.79360961914062, 174.36801147460938, 181.56292724609375, 114.61465454101562, -38.53092956542969], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000194.npy"}
|
|
{"epoch": 0.28487518355359764, "step": 195, "batch_size": 64, "mean": 93.46810913085938, "std": 118.85218811035156, "min": -131.37991333007812, "p10": -52.70166511535645, "median": 76.18124389648438, "p90": 241.66904449462893, "max": 398.1551818847656, "pos_frac": 0.78125, "sample": [83.13178253173828, -21.19281005859375, 215.3645477294922, 52.48524856567383, 74.7281494140625, 398.1551818847656, 71.57465362548828, 337.81787109375, 204.5836181640625, -7.218915939331055, -7.87860107421875, 48.082550048828125, -3.3397903442382812, -101.38612365722656, 244.01858520507812, 114.48005676269531, 55.99317932128906, 236.18678283691406, -43.56652069091797, 351.46881103515625, -53.0106315612793, 108.73912048339844, -102.31790161132812, 36.0860595703125, 104.89869689941406, 108.11522674560547, 226.91995239257812, 301.21990966796875, 142.6515655517578, 121.85945129394531, 150.4920654296875, -83.40363311767578, 71.51058197021484, 180.85128784179688, 18.201019287109375, 142.9118194580078, -33.044960021972656, 8.749259948730469, 171.900146484375, 220.3597869873047, -131.37991333007812, 245.06765747070312, 47.52457046508789, 68.50247955322266, 385.0509948730469, 141.97610473632812, 80.57831573486328, 62.33882141113281, -59.909706115722656, 11.526487350463867, 164.12771606445312, -71.60025024414062, 221.72821044921875, 106.66729736328125, 31.188888549804688, 77.63433837890625, 30.695571899414062, 63.20360565185547, 18.7231502532959, 86.312255859375, -51.980743408203125, 49.58517837524414, 142.70034790039062, 114.52061462402344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000195.npy"}
|
|
{"epoch": 0.28634361233480177, "step": 196, "batch_size": 64, "mean": 105.74667358398438, "std": 144.51861572265625, "min": -217.51046752929688, "p10": -57.22457275390625, "median": 80.21939468383789, "p90": 309.51323852539065, "max": 457.20098876953125, "pos_frac": 0.765625, "sample": [190.99072265625, 198.795654296875, 307.64202880859375, 217.5844268798828, -58.02997589111328, 291.6065673828125, 310.315185546875, 444.84796142578125, 226.31301879882812, 69.62086486816406, 4.050144195556641, 52.15034484863281, 286.2071533203125, 190.78500366210938, 48.965057373046875, 10.385286331176758, -26.476512908935547, 87.70463562011719, -6.9410400390625, 51.45011901855469, 439.91680908203125, -18.989681243896484, 62.24229431152344, 31.197711944580078, 127.16987609863281, 121.88878631591797, 123.12541198730469, 140.26632690429688, 45.454864501953125, -10.836837768554688, 112.45042419433594, 78.55672454833984, 439.9766845703125, 457.20098876953125, -20.448448181152344, 179.6512908935547, 83.82258605957031, 56.55125427246094, 111.87152099609375, 81.88206481933594, 65.5568618774414, 57.509429931640625, 222.83462524414062, -2.8761539459228516, 134.03436279296875, 32.719425201416016, 83.46504211425781, -151.9102325439453, 69.04319763183594, -217.51046752929688, -62.7781982421875, 151.95321655273438, 90.10646057128906, -63.468265533447266, 316.930908203125, -55.345298767089844, -136.4858856201172, 69.27742004394531, -3.1411285400390625, 127.96891021728516, 259.0711975097656, 311.0126953125, -76.93365478515625, 5.835441589355469], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000196.npy"}
|
|
{"epoch": 0.2878120411160059, "step": 197, "batch_size": 64, "mean": 80.89613342285156, "std": 113.04435729980469, "min": -123.44822692871094, "p10": -35.94133148193359, "median": 53.378299713134766, "p90": 232.82198028564457, "max": 400.571044921875, "pos_frac": 0.796875, "sample": [57.39115905761719, 330.1175231933594, 139.1900634765625, -19.554473876953125, 164.53549194335938, 10.561019897460938, 16.870285034179688, -65.54133605957031, 66.10052490234375, 400.571044921875, 354.4127502441406, 225.08189392089844, 66.74774932861328, 122.7001953125, -102.37041473388672, -0.18047332763671875, 123.85791778564453, 275.17803955078125, 24.001365661621094, 41.9847526550293, 100.15231323242188, 210.6070098876953, 42.40727615356445, -1.17047119140625, 53.88129425048828, 7.470735549926758, -105.08324432373047, 131.20208740234375, 84.53754425048828, 95.11041259765625, 259.95361328125, 139.7333221435547, 52.87530517578125, 236.13916015625, 2.019927978515625, 191.7805938720703, -37.791717529296875, 2.9177703857421875, -77.75411987304688, -123.44822692871094, 32.442893981933594, 123.5970687866211, -0.8019237518310547, 7.602485656738281, 26.555448532104492, 77.28762817382812, 305.81158447265625, 200.99856567382812, 23.950546264648438, 189.3396453857422, -11.35649299621582, 49.005001068115234, 82.82758331298828, 27.45709991455078, 23.832359313964844, 103.38240814208984, 168.44126892089844, 75.36032104492188, 199.23782348632812, -31.623764038085938, -82.271240234375, 17.929542541503906, 20.292694091796875, 50.85845947265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000197.npy"}
|
|
{"epoch": 0.28928046989721, "step": 198, "batch_size": 64, "mean": 103.20319366455078, "std": 141.6293182373047, "min": -224.87091064453125, "p10": -28.538953590393064, "median": 86.55558395385742, "p90": 231.76798400878909, "max": 584.1566162109375, "pos_frac": 0.828125, "sample": [-120.5759506225586, 91.83274841308594, -36.399742126464844, 512.8724365234375, 123.81925964355469, 203.1912841796875, 321.99603271484375, 54.98476791381836, 146.11817932128906, 53.38854217529297, 125.42947387695312, 165.12246704101562, -147.47622680664062, 4.412055969238281, 2.14093017578125, 234.79612731933594, 142.0828857421875, 102.12704467773438, 80.89007568359375, 86.5596694946289, -29.36420249938965, -26.613372802734375, 34.86857604980469, 161.15626525878906, 11.553976058959961, 71.16146087646484, -71.10982513427734, 197.33819580078125, 86.55149841308594, 76.25767517089844, 90.78888702392578, 122.44953155517578, 127.590576171875, 76.68214416503906, 127.21989440917969, 13.919925689697266, 79.29566955566406, 159.14456176757812, 153.440185546875, 225.11685180664062, -224.87091064453125, 545.6815185546875, 98.5829086303711, 65.06745147705078, -0.00878143310546875, 6.2953948974609375, 234.61846923828125, -41.41400146484375, 3.658781051635742, 69.94960021972656, 32.8546257019043, 88.0877685546875, 86.18878173828125, 135.33511352539062, -2.661661148071289, 143.05462646484375, 147.0585479736328, 204.60549926757812, 13.888124465942383, 402.84967041015625, -5.9656982421875, 584.1566162109375, 46.55902099609375, 136.67257690429688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000198.npy"}
|
|
{"epoch": 0.2907488986784141, "step": 199, "batch_size": 64, "mean": 102.95765686035156, "std": 139.86825561523438, "min": -168.65264892578125, "p10": -61.64832725524901, "median": 86.84391784667969, "p90": 280.63388671875003, "max": 454.39599609375, "pos_frac": 0.78125, "sample": [207.07516479492188, 173.09376525878906, -135.30117797851562, -9.007553100585938, 270.804443359375, 74.35324096679688, -68.33245849609375, 197.4844207763672, -164.93109130859375, 116.79900360107422, -2.6178054809570312, -79.60348510742188, -168.65264892578125, 214.99098205566406, 200.93972778320312, 70.80547332763672, 14.364585876464844, 137.08590698242188, 384.9705810546875, 41.59986114501953, 85.75202941894531, 168.09327697753906, 376.9091796875, 294.895263671875, -92.67242431640625, 40.81349182128906, -43.930946350097656, 122.88787078857422, 206.18753051757812, 24.42071533203125, 34.68223571777344, 252.2763671875, 454.39599609375, 81.79742431640625, 70.39264678955078, 199.46530151367188, -24.013671875, 113.05864715576172, 25.989105224609375, 90.58512878417969, 61.7333984375, 277.0953063964844, 133.92404174804688, 38.30745315551758, 282.1504211425781, 117.05638885498047, 69.92652893066406, -43.02685546875, -46.05202102661133, -159.7952423095703, 264.11285400390625, 80.2431411743164, 101.5732192993164, 91.24724578857422, 351.39617919921875, 179.6820526123047, 5.918703079223633, 88.94109344482422, 54.59117889404297, 87.93580627441406, 45.71702575683594, 143.84683227539062, 424.68817138671875, -19.829147338867188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000199.npy"}
|
|
{"epoch": 0.2922173274596182, "step": 200, "batch_size": 64, "mean": 102.1077880859375, "std": 155.07470703125, "min": -309.08270263671875, "p10": -45.22786865234374, "median": 78.93537902832031, "p90": 323.9662536621094, "max": 537.7470703125, "pos_frac": 0.796875, "sample": [232.94952392578125, 106.01602172851562, 59.93587112426758, 403.5652770996094, 62.12794876098633, 153.434814453125, 17.35187530517578, 2.2430343627929688, 401.763427734375, -91.04010009765625, 316.8278503417969, 234.36700439453125, 100.41191864013672, 6.989536285400391, 73.78703308105469, 321.85723876953125, 208.67527770996094, 373.521240234375, 5.807769775390625, 324.8701171875, 68.431640625, 2.0669689178466797, 154.29774475097656, 215.62281799316406, -21.87081527709961, 74.01066589355469, 186.306396484375, -189.45126342773438, 537.7470703125, 87.15550994873047, 101.59793090820312, -27.69162368774414, -5.3050384521484375, 74.42782592773438, 51.88117980957031, -33.49090576171875, -309.08270263671875, 7.604705810546875, 99.91448974609375, 340.0821838378906, 5.50526237487793, 90.2611312866211, 57.89837646484375, -2.1691665649414062, 0.046146392822265625, -155.20877075195312, 83.44293212890625, -50.25799560546875, 291.2373352050781, -30.473485946655273, 4.185600280761719, 410.07940673828125, 46.54130554199219, 24.06275177001953, 98.80625915527344, 255.4284210205078, 158.11956787109375, 163.743408203125, 103.11872863769531, 169.10037231445312, -122.69386291503906, 144.47105407714844, -65.6595458984375, 125.5959701538086], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000200.npy"}
|
|
{"epoch": 0.2936857562408223, "step": 201, "batch_size": 64, "mean": 101.78836059570312, "std": 144.7853240966797, "min": -285.1991271972656, "p10": -42.69153900146483, "median": 80.66946411132812, "p90": 285.44678039550786, "max": 488.533203125, "pos_frac": 0.8125, "sample": [271.12371826171875, 18.071897506713867, 215.5498809814453, -227.4378662109375, 52.942588806152344, -92.52766418457031, 8.548725128173828, 14.470579147338867, -27.77618408203125, 85.00717163085938, 2.693634033203125, 27.308746337890625, 36.5526123046875, -28.385833740234375, 463.7787170410156, 106.72628021240234, 70.03189086914062, 105.5915298461914, 24.379608154296875, 235.63961791992188, 353.2997131347656, 127.51961517333984, 5.117767333984375, 93.42268371582031, 216.06253051757812, 264.5859069824219, -77.11985778808594, 220.0215606689453, 194.06045532226562, -13.51716423034668, 76.33175659179688, 122.614501953125, 121.40563201904297, 118.54637145996094, -34.1278076171875, -68.40798950195312, 176.34423828125, 71.0750961303711, 50.68329620361328, 488.533203125, 104.7509765625, -24.979515075683594, 160.72076416015625, 75.4656982421875, 46.99433898925781, 167.83912658691406, 145.18563842773438, 252.19398498535156, 388.6724548339844, 68.84805297851562, 86.6653823852539, -64.22894287109375, 143.68141174316406, 69.8940658569336, -285.1991271972656, 294.86614990234375, 412.2462463378906, 140.86419677734375, 12.805719375610352, -46.36170959472656, 291.5852355957031, 44.940025329589844, 12.860671997070312, 145.40298461914062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000201.npy"}
|
|
{"epoch": 0.29515418502202645, "step": 202, "batch_size": 64, "mean": 71.75799560546875, "std": 142.43643188476562, "min": -213.11941528320312, "p10": -97.53666763305662, "median": 61.881126403808594, "p90": 256.46564025878905, "max": 526.05322265625, "pos_frac": 0.703125, "sample": [526.05322265625, -51.64067459106445, 93.32756805419922, 305.93524169921875, 2.0363407135009766, -49.02562713623047, 235.52850341796875, -46.72687530517578, 122.58370971679688, 93.53215026855469, 50.134342193603516, -192.13429260253906, 174.450927734375, 241.01885986328125, 211.52828979492188, 47.046661376953125, -202.46267700195312, 47.716522216796875, 105.07058715820312, 340.997802734375, 116.85680389404297, 282.05670166015625, -136.42649841308594, 186.93353271484375, 26.711212158203125, 29.908912658691406, 390.59912109375, -16.324766159057617, 105.82981872558594, 133.86814880371094, 101.37223815917969, 125.84597778320312, 255.50662231445312, -31.222396850585938, -38.759674072265625, -118.11332702636719, 81.89356994628906, 60.384613037109375, 2.2354965209960938, 231.84686279296875, 99.94755554199219, -8.198989868164062, 23.156383514404297, 265.2305603027344, 152.17117309570312, 114.00845336914062, 80.52568817138672, -213.11941528320312, 63.37763977050781, 99.4615478515625, 16.70828628540039, 41.55596160888672, -55.32872009277344, -67.02851104736328, 35.52197265625, -107.68321228027344, 82.48626708984375, -73.86139678955078, -50.13321304321289, 53.289146423339844, 256.87664794921875, -131.19595336914062, 89.9765625, -21.206707000732422], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000202.npy"}
|
|
{"epoch": 0.2966226138032305, "step": 203, "batch_size": 64, "mean": 87.42697143554688, "std": 122.77902221679688, "min": -187.09182739257812, "p10": -65.85315475463867, "median": 71.69087982177734, "p90": 246.66162109375003, "max": 453.85174560546875, "pos_frac": 0.78125, "sample": [17.479084014892578, 132.01870727539062, 158.966796875, 63.333351135253906, -28.214168548583984, 280.5261535644531, -74.66671752929688, 134.28631591796875, 27.271718978881836, 113.94326782226562, 0.8704071044921875, 104.46146392822266, 104.33635711669922, 160.77218627929688, -106.54536437988281, 182.7406768798828, 123.81341552734375, -23.046524047851562, 181.8553009033203, 135.5670623779297, 271.969970703125, 414.781494140625, -71.12776184082031, 114.98870849609375, 128.48284912109375, 37.36846923828125, 453.85174560546875, 11.483955383300781, -91.07218933105469, 145.10745239257812, -3.0806045532226562, 168.59976196289062, 183.9542236328125, 265.4671630859375, 21.846435546875, 183.3017120361328, 80.54850006103516, 102.96720123291016, 73.35491943359375, 122.35472869873047, 28.3770751953125, 66.54853057861328, 40.414634704589844, 239.50418090820312, -53.545738220214844, -187.09182739257812, -52.23500061035156, 70.02684020996094, -30.768144607543945, 8.028156280517578, 162.5260009765625, 15.833503723144531, 19.770626068115234, 47.23042297363281, -75.181396484375, -77.20587158203125, 138.88967895507812, 54.27809143066406, 249.72909545898438, 336.4516296386719, -9.897058486938477, 238.9939727783203, 34.674598693847656, 25.056259155273438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000203.npy"}
|
|
{"epoch": 0.29809104258443464, "step": 204, "batch_size": 64, "mean": 81.47257995605469, "std": 144.63739013671875, "min": -233.6527099609375, "p10": -71.2285934448242, "median": 70.60073852539062, "p90": 309.64455566406264, "max": 473.2161865234375, "pos_frac": 0.703125, "sample": [207.6559295654297, 73.87043762207031, 44.07413864135742, 43.17179870605469, 120.20130157470703, -15.734634399414062, 221.08917236328125, -106.7759017944336, 86.53588104248047, 17.475854873657227, -39.00509262084961, 22.064733505249023, 13.67706298828125, 84.52071380615234, 56.00592041015625, -42.413917541503906, -233.6527099609375, 164.93759155273438, 50.94941711425781, 7.817678451538086, 280.32769775390625, -127.15821075439453, -25.121932983398438, -9.344432830810547, 77.8043212890625, -16.49081802368164, 72.75645446777344, 150.2292022705078, 153.56866455078125, -81.26924133300781, 396.5112609863281, -12.720527648925781, 75.89002990722656, 32.378929138183594, -47.8004150390625, 145.0404815673828, 55.03523254394531, 155.06185913085938, 431.0225830078125, -201.3225555419922, 322.20892333984375, 167.05247497558594, 164.17697143554688, 75.03558349609375, 92.22671508789062, -10.705469131469727, 43.22821044921875, 223.2478790283203, 327.7889099121094, -9.94171142578125, 85.0364990234375, 160.10723876953125, -121.18893432617188, 68.44502258300781, 473.2161865234375, 367.00225830078125, -37.81804656982422, 142.80892944335938, 23.990694046020508, 355.6167907714844, -125.3470458984375, 86.22834014892578, 108.43948364257812, -47.474403381347656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000204.npy"}
|
|
{"epoch": 0.29955947136563876, "step": 205, "batch_size": 64, "mean": 107.37113952636719, "std": 141.7747802734375, "min": -262.6477966308594, "p10": -74.80586776733398, "median": 110.17658996582031, "p90": 273.14307250976566, "max": 396.8501892089844, "pos_frac": 0.765625, "sample": [35.83800506591797, 138.63632202148438, 222.6278076171875, 365.7477111816406, 116.88506317138672, -1.690786361694336, -7.287073135375977, 24.103363037109375, 8.894096374511719, 234.69375610351562, 256.84490966796875, -94.3277587890625, -98.09886169433594, 94.22338104248047, -16.957618713378906, 73.43264770507812, 16.63750457763672, 371.0246276855469, 227.2021942138672, 27.553104400634766, 103.4681167602539, 97.6956787109375, -144.56356811523438, 172.65110778808594, 304.97467041015625, 242.68508911132812, 195.88375854492188, -4.582798004150391, -75.6966323852539, 212.98483276367188, -48.619754791259766, 387.8525085449219, 127.62445068359375, 84.06195831298828, 41.61664581298828, 123.73239135742188, 396.8501892089844, 267.52764892578125, 117.80912780761719, 133.1713104248047, -211.20840454101562, 144.69390869140625, 117.94393157958984, 239.4132843017578, 151.93045043945312, -72.7274169921875, 146.56942749023438, 90.12027740478516, 275.5496826171875, 78.4461898803711, 240.1511688232422, 191.73121643066406, 170.07203674316406, 68.77952575683594, 146.4585723876953, 366.2047424316406, 102.2500991821289, -11.571968078613281, 16.221450805664062, -262.6477966308594, -76.58341979980469, 215.41055297851562, -31.1246337890625, 42.56117248535156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000205.npy"}
|
|
{"epoch": 0.3010279001468429, "step": 206, "batch_size": 64, "mean": 117.07075500488281, "std": 128.15878295898438, "min": -244.322265625, "p10": -37.4110345840454, "median": 104.99200820922852, "p90": 290.76638793945324, "max": 403.7200927734375, "pos_frac": 0.84375, "sample": [47.6864013671875, 192.3782958984375, 97.04057312011719, 123.73007202148438, 403.7200927734375, 301.888671875, 84.28556823730469, 50.707801818847656, 220.9821014404297, 248.5625457763672, 57.15812683105469, 131.25758361816406, 9.407964706420898, 104.88671112060547, -244.322265625, 88.61737060546875, 78.59809875488281, 166.39768981933594, 60.703575134277344, -29.135671615600586, 217.59390258789062, -40.957618713378906, 227.09024047851562, 356.3891906738281, 115.12245178222656, 193.28814697265625, 24.991653442382812, 204.2659454345703, -100.20640563964844, -110.4993667602539, 195.7232208251953, 305.3233642578125, 212.46502685546875, 223.05076599121094, 13.418581008911133, 244.44235229492188, 80.07279205322266, -74.34521484375, 155.41085815429688, -141.42027282714844, 48.058067321777344, 216.30410766601562, 12.97439956665039, 163.1947021484375, 80.41719818115234, -11.6385498046875, 116.29451751708984, 87.85832977294922, 196.5759735107422, 92.01690673828125, -64.00833892822266, 6.060935974121094, 102.45491790771484, 189.056884765625, 363.8929443359375, 264.81439208984375, -3.624208450317383, 121.25949096679688, 105.09730529785156, 307.510009765625, 64.47785186767578, 25.460466384887695, 315.31341552734375, 196.93527221679688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000206.npy"}
|
|
{"epoch": 0.302496328928047, "step": 207, "batch_size": 64, "mean": 136.12110900878906, "std": 158.26390075683594, "min": -133.196533203125, "p10": -30.548121643066402, "median": 89.42457580566406, "p90": 314.4951293945313, "max": 596.5138549804688, "pos_frac": 0.84375, "sample": [25.076824188232422, -133.196533203125, 64.12458038330078, 98.76174926757812, 179.48233032226562, 46.5146484375, 319.71612548828125, -32.499046325683594, -13.552093505859375, 14.568344116210938, 82.04205322265625, 83.94928741455078, 302.31280517578125, -9.575729370117188, 129.5380859375, 170.22425842285156, 49.24919891357422, 90.49520874023438, 35.970706939697266, 34.46246337890625, 52.95255661010742, 175.903564453125, 31.87274932861328, 255.8198699951172, 28.279617309570312, -70.08810424804688, 300.7051086425781, 61.75733947753906, 34.661705017089844, -51.40916061401367, 281.0083923339844, 260.752197265625, -61.91729736328125, 0.27335357666015625, 28.43608856201172, 273.0008850097656, 35.01214599609375, 237.8317413330078, 155.41189575195312, 251.82777404785156, 265.8712158203125, 125.22665405273438, 105.7276611328125, 136.31610107421875, 267.4071044921875, 246.19784545898438, 80.36451721191406, 388.5614013671875, 518.6900634765625, 287.042236328125, 48.20018005371094, 448.28045654296875, -43.15746307373047, 43.428810119628906, 596.5138549804688, 42.013763427734375, 96.06954956054688, 97.13888549804688, 88.35394287109375, 534.4915771484375, -92.42047882080078, 191.20849609375, 446.4629821777344, -25.99596405029297], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000207.npy"}
|
|
{"epoch": 0.3039647577092511, "step": 208, "batch_size": 64, "mean": 105.91764831542969, "std": 156.5018310546875, "min": -351.854736328125, "p10": -41.703514862060544, "median": 81.09241485595703, "p90": 310.95688476562503, "max": 444.2874755859375, "pos_frac": 0.78125, "sample": [-206.7952117919922, 144.9790802001953, -148.91407775878906, 291.79144287109375, 20.07177734375, -20.53079605102539, 371.997314453125, 33.55540466308594, 108.35921478271484, 249.55999755859375, 251.37025451660156, 61.544105529785156, 80.80319213867188, 174.24827575683594, 213.17123413085938, 57.95378875732422, 444.2874755859375, 15.993621826171875, 422.61676025390625, 126.90716552734375, -3.984344482421875, 27.89954376220703, 246.42739868164062, 170.551025390625, 81.38163757324219, 26.92113494873047, 52.24371337890625, 5.012908935546875, -129.2196502685547, 281.3805236816406, -135.4814910888672, -10.502490997314453, 49.440128326416016, 110.72417449951172, 138.32962036132812, 302.040771484375, 282.22705078125, 39.5037956237793, -25.732017517089844, 237.70700073242188, -42.596527099609375, 68.23512268066406, 25.43865203857422, 341.718994140625, 131.67230224609375, 314.778076171875, 102.99479675292969, -351.854736328125, -39.61981964111328, 153.30764770507812, 14.636810302734375, 125.92549133300781, 342.06634521484375, 103.99932861328125, 59.02601623535156, -135.29672241210938, 18.970474243164062, 383.9123840332031, 181.88436889648438, 290.3797607421875, 197.86257934570312, 78.77214050292969, -3.8595733642578125, -23.465179443359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000208.npy"}
|
|
{"epoch": 0.3054331864904552, "step": 209, "batch_size": 64, "mean": 93.52122497558594, "std": 151.7918243408203, "min": -326.0837707519531, "p10": -79.75263519287108, "median": 88.13810348510742, "p90": 313.1155426025391, "max": 440.80902099609375, "pos_frac": 0.71875, "sample": [339.05224609375, 78.77662658691406, 175.3345947265625, -45.43822479248047, 365.93414306640625, 408.91009521484375, 318.0581359863281, 116.74105834960938, 125.54444122314453, 99.81448364257812, -61.3139762878418, 95.3148422241211, 162.50341796875, 95.91057586669922, 162.0978546142578, 130.73648071289062, 162.68624877929688, -167.8165740966797, -81.8470458984375, 59.264564514160156, 250.65859985351562, 226.58935546875, -4.158517837524414, -94.609130859375, 44.818603515625, -46.12831115722656, -23.578027725219727, 9.186485290527344, 95.13751220703125, 156.23898315429688, 115.93634033203125, 81.1386947631836, -128.50408935546875, -20.780567169189453, 64.01457977294922, 71.20123291015625, -74.86567687988281, -37.747859954833984, 367.7143249511719, 301.58282470703125, 51.402099609375, 440.80902099609375, 9.841255187988281, -21.747825622558594, -27.600860595703125, 34.71430206298828, 41.52264404296875, 74.9681396484375, 247.0990447998047, 131.74574279785156, 135.21751403808594, -36.98644256591797, -137.93951416015625, -326.0837707519531, 179.48257446289062, 96.11257934570312, 36.43581008911133, 50.909446716308594, 136.53903198242188, -149.0877685546875, 344.26776123046875, 296.1293640136719, 211.76919555664062, 271.7290344238281], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000209.npy"}
|
|
{"epoch": 0.3069016152716593, "step": 210, "batch_size": 64, "mean": 107.38471984863281, "std": 147.74757385253906, "min": -244.6871337890625, "p10": -32.44748001098632, "median": 81.94678497314453, "p90": 310.7277893066407, "max": 503.7625732421875, "pos_frac": 0.75, "sample": [-7.098907470703125, 0.7410373687744141, 131.4471435546875, 68.07757568359375, -38.79005432128906, -85.98199462890625, 177.909423828125, -13.220983505249023, 134.82496643066406, -35.272117614746094, 194.49557495117188, 20.18988800048828, 34.37062454223633, 123.8178482055664, 272.72552490234375, 143.36563110351562, -6.192535400390625, 71.90609741210938, 204.9383544921875, 18.306827545166016, 282.31646728515625, 94.78675079345703, 30.245256423950195, 232.3291015625, 255.76193237304688, 4.625551223754883, 13.135833740234375, 317.31048583984375, 187.73056030273438, 370.66363525390625, 125.5974349975586, 246.1757049560547, 121.1238784790039, 295.3681640625, -119.82049560546875, -25.856658935546875, -89.81619262695312, -18.819320678710938, 31.32737922668457, 6.228572845458984, 139.50634765625, 166.78482055664062, 351.92083740234375, 100.88418579101562, -4.088598251342773, -14.207550048828125, 433.1382751464844, -13.109024047851562, 124.26219177246094, 156.62930297851562, 278.97454833984375, 503.7625732421875, 293.46246337890625, 48.221702575683594, -58.339866638183594, 91.98747253417969, -22.503990173339844, 4.935649871826172, 10.820619583129883, 348.9691162109375, 3.7842330932617188, 17.551963806152344, -244.6871337890625, 382.9874267578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000210.npy"}
|
|
{"epoch": 0.30837004405286345, "step": 211, "batch_size": 64, "mean": 92.20182800292969, "std": 163.1658477783203, "min": -214.251708984375, "p10": -61.749727630615226, "median": 59.897769927978516, "p90": 266.8033569335938, "max": 566.5140380859375, "pos_frac": 0.71875, "sample": [128.15647888183594, -52.693450927734375, 236.29156494140625, 103.62032318115234, 55.76565170288086, 161.11680603027344, -46.45977783203125, 112.88797760009766, 527.7872314453125, -214.251708984375, -41.80535125732422, 57.33330535888672, 26.573848724365234, 15.50250244140625, 65.08087158203125, 230.2621307373047, 147.0859375, 207.760498046875, 164.17422485351562, -10.607406616210938, -0.33484649658203125, -36.196807861328125, 203.18576049804688, 109.5970687866211, -65.63098907470703, 7.7254791259765625, -44.889434814453125, 522.816650390625, 41.68794631958008, 566.5140380859375, 459.88031005859375, -50.94347381591797, -134.6807861328125, 142.27586364746094, 222.54373168945312, 316.253173828125, -152.93292236328125, 143.24374389648438, 51.27374267578125, -7.326873779296875, 118.97679901123047, 397.42041015625, -27.10987091064453, 74.06084442138672, 9.806425094604492, 48.98905944824219, 71.43679809570312, -94.32582092285156, 15.82187271118164, 216.57485961914062, -157.6688690185547, 62.46223449707031, 195.18446350097656, 42.599830627441406, 151.1214599609375, 273.326416015625, 87.6716537475586, 251.5828857421875, 20.330413818359375, 128.00270080566406, 35.098392486572266, 15.847442626953125, -15.938949584960938, -187.9974365234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000211.npy"}
|
|
{"epoch": 0.30983847283406757, "step": 212, "batch_size": 64, "mean": 131.85598754882812, "std": 170.150634765625, "min": -241.6673126220703, "p10": -68.3579620361328, "median": 139.86361694335938, "p90": 314.61345825195315, "max": 593.81640625, "pos_frac": 0.75, "sample": [-241.6673126220703, -92.35271453857422, 314.7279357910156, 261.0855407714844, 57.11035919189453, 242.77487182617188, 110.12103271484375, -73.73216247558594, -30.853620529174805, -1.6504135131835938, 246.13795471191406, 173.60357666015625, -196.1531524658203, -40.246551513671875, -27.503742218017578, 8.689964294433594, 315.16253662109375, 210.218505859375, 424.85687255859375, 234.6907196044922, 105.35581970214844, -109.86200714111328, 29.553081512451172, 100.38743591308594, 149.45054626464844, 171.77682495117188, 225.93234252929688, -80.41940307617188, 298.0386657714844, -9.697893142700195, 129.63272094726562, 178.0751190185547, 593.81640625, 176.5470733642578, 54.65612030029297, -216.20066833496094, 303.2891845703125, -55.81816101074219, 139.07073974609375, 574.6016235351562, 289.2341613769531, 112.09963989257812, 49.49969482421875, 232.77239990234375, 14.850059509277344, 140.656494140625, -32.10812759399414, 54.466121673583984, 314.3463439941406, 47.73955154418945, 141.73324584960938, 365.2469482421875, 397.0732421875, 262.94537353515625, 307.5562744140625, -6.470355987548828, 235.64707946777344, 62.876094818115234, 151.52023315429688, 143.09146118164062, 68.06697845458984, 299.2960205078125, -38.181243896484375, 171.6195068359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000212.npy"}
|
|
{"epoch": 0.31130690161527164, "step": 213, "batch_size": 64, "mean": 125.99160766601562, "std": 153.23873901367188, "min": -200.09783935546875, "p10": -63.980883789062496, "median": 110.3159408569336, "p90": 338.30189208984376, "max": 446.0223693847656, "pos_frac": 0.796875, "sample": [287.1466064453125, -86.83132934570312, 41.53002166748047, -108.2071304321289, 226.6097869873047, 14.892684936523438, 51.178260803222656, 98.11820983886719, -200.09783935546875, -80.84298706054688, 10.24917221069336, 310.34033203125, 356.3276062011719, -31.21373176574707, 98.37950897216797, 177.33316040039062, 9.35325813293457, 62.17814636230469, 157.96791076660156, 324.43792724609375, 419.875244140625, 317.34149169921875, 50.19617462158203, 420.3418273925781, 171.57371520996094, 274.43951416015625, 365.8465881347656, -16.440296173095703, -3.7909469604492188, -12.162738800048828, 186.41497802734375, 124.28882598876953, 197.2703857421875, 122.25237274169922, -59.332916259765625, 18.5120849609375, 173.54568481445312, 340.0644836425781, 282.8717041015625, 151.93768310546875, -8.603893280029297, 195.4077911376953, 334.1891784667969, -65.97286987304688, 200.12025451660156, 60.586181640625, 166.85865783691406, 446.0223693847656, -136.96417236328125, 191.20074462890625, 249.56332397460938, 298.92388916015625, 370.260009765625, 52.0104866027832, 8.735977172851562, 3.148303985595703, -120.63542938232422, 80.6380615234375, 96.34577178955078, 67.00055694580078, 13.857536315917969, 13.304855346679688, 165.64219665527344, 137.92752075195312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000213.npy"}
|
|
{"epoch": 0.31277533039647576, "step": 214, "batch_size": 64, "mean": 113.03147888183594, "std": 147.8744659423828, "min": -175.60574340820312, "p10": -54.35195693969726, "median": 84.23110580444336, "p90": 296.1325164794922, "max": 525.519775390625, "pos_frac": 0.796875, "sample": [62.05211639404297, 413.0527038574219, 15.042583465576172, 191.35777282714844, 4.095766067504883, -9.187765121459961, -84.01930236816406, -175.60574340820312, 268.6060791015625, -81.06674194335938, 170.82217407226562, 40.81611633300781, 109.3017807006836, -0.6740150451660156, 33.35807800292969, 107.8150634765625, 218.6571044921875, -18.915361404418945, 46.67387390136719, 184.86134338378906, 71.59615325927734, 61.335838317871094, 105.6054916381836, 267.5733642578125, 246.372314453125, 73.39183807373047, 11.880622863769531, 0.10316658020019531, 124.62579345703125, -46.75257110595703, 95.07037353515625, 65.04422760009766, 15.982200622558594, 340.68603515625, -82.67926788330078, 525.519775390625, 351.326416015625, 5.4781341552734375, 27.038543701171875, 401.6438293457031, 214.24513244628906, 204.51217651367188, 266.51092529296875, 298.623291015625, -142.76046752929688, 496.56610107421875, 22.995725631713867, 209.58004760742188, 216.8170166015625, 100.44823455810547, -49.19316864013672, 164.26881408691406, -45.960205078125, 117.65415954589844, -58.0999755859375, 240.16488647460938, 68.1973876953125, 290.3207092285156, 65.6295166015625, 145.8204345703125, 122.85043334960938, 129.79600524902344, -56.5628662109375, 53.7040901184082], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000214.npy"}
|
|
{"epoch": 0.3142437591776799, "step": 215, "batch_size": 64, "mean": 118.9841079711914, "std": 136.97496032714844, "min": -148.7521514892578, "p10": -62.17883911132812, "median": 104.1790657043457, "p90": 284.74669189453124, "max": 518.4662475585938, "pos_frac": 0.828125, "sample": [65.93231201171875, 337.9312438964844, 126.52665710449219, 271.9446105957031, -53.30224609375, -94.1912841796875, 51.433860778808594, 122.72553253173828, 140.78807067871094, 101.8367919921875, 126.32444763183594, 46.154876708984375, 5.40629768371582, -33.625343322753906, 413.67034912109375, 257.3918151855469, 244.95233154296875, 283.27508544921875, 518.4662475585938, 162.86746215820312, 101.35153198242188, 392.70599365234375, 102.4393081665039, -78.45564270019531, 73.05867004394531, 198.08148193359375, -70.35646057128906, 69.84879302978516, 197.28546142578125, 259.9056396484375, 32.18054962158203, 67.9691162109375, 58.181549072265625, -9.958150863647461, 158.61978149414062, 90.53658294677734, -122.2544937133789, 147.1148223876953, 194.88735961914062, 264.64697265625, 349.99371337890625, 109.73209381103516, 196.4800262451172, -5.104034423828125, 140.57830810546875, 50.51325988769531, 61.77080535888672, -65.98309326171875, 50.26120376586914, 203.81578063964844, -148.7521514892578, 157.04830932617188, 384.14434814453125, 33.41514205932617, 63.06596374511719, 105.9188232421875, 285.37738037109375, 6.6734466552734375, 207.16989135742188, 36.585655212402344, 107.77599334716797, 38.781890869140625, -89.08529663085938, 110.50728607177734], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000215.npy"}
|
|
{"epoch": 0.315712187958884, "step": 216, "batch_size": 64, "mean": 116.20650482177734, "std": 168.2432098388672, "min": -297.67608642578125, "p10": -71.09631118774413, "median": 101.3721694946289, "p90": 364.2438110351563, "max": 566.099853515625, "pos_frac": 0.734375, "sample": [181.88998413085938, 397.01385498046875, -70.92273712158203, 144.56195068359375, 200.23626708984375, 40.94036865234375, -181.7615966796875, 156.79122924804688, 214.7274932861328, 6.236030578613281, 566.099853515625, 97.02784729003906, 105.71649169921875, -71.17070007324219, -14.60062026977539, -21.12237548828125, -89.8766098022461, 205.09120178222656, 36.93061447143555, -174.29495239257812, -6.1335906982421875, 302.1771240234375, -14.377372741699219, -8.837615966796875, 111.32168579101562, 151.2089385986328, 48.066619873046875, 183.51144409179688, 109.80255126953125, -2.726045608520508, 111.5474853515625, 208.02725219726562, 23.68341827392578, -55.00988006591797, 373.885986328125, 369.3604736328125, -72.39292907714844, 296.93963623046875, 42.46311950683594, 85.50865936279297, 64.99840545654297, 289.2521667480469, 4.198785781860352, -297.67608642578125, 56.540382385253906, 194.90283203125, 227.4705810546875, 400.39105224609375, 46.9688720703125, -84.07233428955078, 274.1953430175781, 42.22288513183594, 114.61946868896484, 175.10205078125, -59.847084045410156, 338.92578125, 207.38491821289062, 334.9000244140625, 402.085693359375, 383.91790771484375, 20.207229614257812, -65.53533172607422, 26.217330932617188, 352.304931640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000216.npy"}
|
|
{"epoch": 0.31718061674008813, "step": 217, "batch_size": 64, "mean": 137.11187744140625, "std": 155.8596954345703, "min": -177.6549835205078, "p10": -17.782174682617185, "median": 94.10555267333984, "p90": 371.1603149414063, "max": 528.1328125, "pos_frac": 0.84375, "sample": [137.6058349609375, 135.75563049316406, 94.78417205810547, 127.87317657470703, 63.01770782470703, 52.534095764160156, 66.383056640625, 237.18299865722656, 80.3468246459961, 92.03582763671875, 46.055450439453125, 375.046142578125, 144.56417846679688, 362.0933837890625, -116.3198013305664, -0.8191299438476562, 34.33708190917969, 8.988311767578125, 195.99639892578125, 528.1328125, -7.723812103271484, 63.19690704345703, 263.1900939941406, 434.3919372558594, 93.42693328857422, 210.12057495117188, -33.096168518066406, 67.45468139648438, -177.6549835205078, 104.3385009765625, 494.2815856933594, 194.4508056640625, 7.377042770385742, 68.46739196777344, 268.163330078125, 4.011810302734375, 227.60296630859375, 279.8141784667969, -17.339942932128906, 482.30291748046875, 428.11236572265625, -80.94893646240234, 361.1319580078125, 277.14434814453125, 138.502685546875, 462.1713562011719, 8.235885620117188, 95.9039306640625, 72.80756378173828, 65.25786590576172, 63.68119812011719, 155.61679077148438, 260.2296447753906, 3.942476272583008, 161.96990966796875, 76.16972351074219, -44.321372985839844, -17.971702575683594, 27.0955810546875, -32.65956115722656, 127.95765686035156, 210.5738983154297, 243.46266174316406, 18.723108291625977], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000217.npy"}
|
|
{"epoch": 0.3186490455212922, "step": 218, "batch_size": 64, "mean": 106.32524108886719, "std": 161.58865356445312, "min": -256.5267333984375, "p10": -76.16501464843749, "median": 83.36017990112305, "p90": 326.4840728759766, "max": 571.85791015625, "pos_frac": 0.734375, "sample": [93.24697875976562, 48.659400939941406, 173.6424560546875, 246.32786560058594, 144.80133056640625, 26.191577911376953, 330.3853454589844, 72.02809143066406, 6.801176071166992, 65.0412826538086, 155.5831298828125, 51.223602294921875, -18.44422149658203, 129.9713897705078, 446.72412109375, 169.3790740966797, 115.5692138671875, -29.161041259765625, 138.83792114257812, 193.44622802734375, 40.01202392578125, 63.237464904785156, -0.3558235168457031, 226.94723510742188, -59.71513366699219, -256.5267333984375, 317.381103515625, 96.6849365234375, 189.58084106445312, 201.5040283203125, 38.296630859375, 421.41204833984375, 423.41400146484375, 253.98529052734375, 199.8343505859375, 69.97196960449219, -24.06337547302246, -25.523569107055664, 88.70513153076172, -22.402103424072266, -38.257301330566406, -93.97601318359375, 144.30923461914062, -99.94415283203125, -74.31774139404297, -50.25971221923828, 571.85791015625, 333.71234130859375, 204.44313049316406, -76.95670318603516, -146.30801391601562, 234.31874084472656, 443.6822509765625, 11.13200569152832, 73.1258544921875, 78.01522827148438, 152.8314971923828, 108.18856048583984, 35.659889221191406, -153.31829833984375, 77.3488540649414, -153.8125762939453, 312.5782470703125, 108.12728881835938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000218.npy"}
|
|
{"epoch": 0.3201174743024963, "step": 219, "batch_size": 64, "mean": 91.84480285644531, "std": 173.06005859375, "min": -410.1614685058594, "p10": -110.70987777709959, "median": 68.53975296020508, "p90": 346.35815124511726, "max": 480.4259033203125, "pos_frac": 0.75, "sample": [457.42327880859375, 169.88641357421875, 11.668142318725586, 326.855712890625, 3.515512466430664, 427.80670166015625, -410.1614685058594, 19.660789489746094, 37.8853759765625, 307.7984924316406, -11.175956726074219, 97.9596176147461, 207.453369140625, 448.88018798828125, 480.4259033203125, 169.4783935546875, 64.96900177001953, 258.46844482421875, 105.36776733398438, 109.14691162109375, 258.5706787109375, 38.12898254394531, -120.50682830810547, 4.614044189453125, -45.496849060058594, -56.73778533935547, 354.7163391113281, -4.531379699707031, 360.281982421875, 374.78192138671875, 41.61742401123047, 279.0205078125, -2.0500030517578125, 45.89070129394531, 16.487319946289062, 186.42794799804688, 109.66149139404297, 17.07184600830078, 104.19744873046875, 18.20819091796875, 297.4757385253906, 132.83534240722656, 97.8828125, -175.23641967773438, 59.14976501464844, 159.9532928466797, -176.83935546875, 6.90278434753418, 9.471607208251953, 233.4733428955078, 27.84124755859375, -126.8418960571289, -5.618370056152344, -59.21165084838867, -240.8579559326172, 79.5975570678711, 149.02886962890625, 87.47311401367188, -87.85032653808594, -30.049209594726562, 140.2393798828125, 72.11050415039062, 84.83969116210938, -121.36864471435547], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000219.npy"}
|
|
{"epoch": 0.32158590308370044, "step": 220, "batch_size": 64, "mean": 77.24984741210938, "std": 126.11031341552734, "min": -254.4945068359375, "p10": -65.81688842773437, "median": 78.26408386230469, "p90": 199.88736114501955, "max": 422.3584289550781, "pos_frac": 0.75, "sample": [201.01553344726562, 49.611366271972656, 64.78952026367188, -29.75170135498047, 34.891014099121094, 327.65167236328125, 371.3300476074219, 4.873271942138672, 138.5681610107422, 146.1868133544922, 125.47855377197266, 265.91278076171875, 136.87115478515625, 142.986572265625, -225.9703369140625, 103.833984375, 171.22531127929688, 52.415618896484375, 135.38323974609375, 197.2549591064453, 73.88539123535156, 22.910518646240234, 4.1168060302734375, -254.4945068359375, -66.9185562133789, 193.7339324951172, -28.917795181274414, 51.81329345703125, 209.70144653320312, 65.46961975097656, -45.906585693359375, 160.58721923828125, 121.08195495605469, -18.510543823242188, 203.2058868408203, 116.9098129272461, 422.3584289550781, 163.0289764404297, -2.5494461059570312, 107.40166473388672, 17.62874984741211, 53.15781784057617, 4.367334365844727, 92.97431945800781, -96.06295013427734, 150.26126098632812, 61.864288330078125, 134.3290557861328, -139.25059509277344, 82.64277648925781, 72.25144958496094, -14.924949645996094, 143.23760986328125, -60.961151123046875, 49.398563385009766, 153.89671325683594, 180.8106689453125, -63.24633026123047, 176.65640258789062, 173.21484375, -18.115657806396484, 155.76443481445312, -143.87484741210938, -135.49432373046875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000220.npy"}
|
|
{"epoch": 0.32305433186490456, "step": 221, "batch_size": 64, "mean": 169.42575073242188, "std": 160.10781860351562, "min": -175.195556640625, "p10": -21.388056182861316, "median": 171.666015625, "p90": 382.7446441650391, "max": 537.0731201171875, "pos_frac": 0.84375, "sample": [75.77404022216797, 112.07882690429688, 221.5959014892578, -7.110179901123047, 96.49252319335938, 537.0731201171875, 22.545692443847656, 391.0269775390625, 217.26290893554688, 274.05926513671875, 284.8736572265625, 23.930164337158203, 288.0059814453125, 248.46212768554688, 162.72183227539062, 276.0635986328125, -10.610137939453125, 457.1679382324219, 2.9527416229248047, 90.21085357666016, 164.373291015625, 294.4010009765625, 364.0237731933594, 475.5550537109375, 479.396728515625, 243.58047485351562, 230.9079132080078, -175.195556640625, 56.75657653808594, 68.8290023803711, 168.71853637695312, 194.40232849121094, -29.882553100585938, 50.43260955810547, 65.46151733398438, 174.61349487304688, 107.00293731689453, -127.37727355957031, 260.44293212890625, 386.105224609375, -26.007164001464844, 283.240966796875, 242.16220092773438, 325.59478759765625, 121.4217529296875, 228.38226318359375, 34.034149169921875, 374.9032897949219, 309.9325866699219, 228.93722534179688, -35.581722259521484, -106.94083404541016, -4.719856262207031, 245.2525634765625, 52.41481018066406, -67.814697265625, 18.578210830688477, 128.54095458984375, 45.54802322387695, 193.26785278320312, 300.44036865234375, 80.27931213378906, 464.999755859375, 189.2547149658203], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000221.npy"}
|
|
{"epoch": 0.3245227606461087, "step": 222, "batch_size": 64, "mean": 126.48561096191406, "std": 159.88316345214844, "min": -224.42062377929688, "p10": -43.92554397583008, "median": 115.12967300415039, "p90": 356.7533508300782, "max": 539.1654663085938, "pos_frac": 0.796875, "sample": [-44.03253936767578, 442.8094787597656, 0.9259185791015625, -106.64602661132812, -13.794212341308594, -92.26589965820312, -15.108612060546875, 118.1082763671875, 4.535919189453125, 221.8070068359375, 107.51183319091797, 120.76795196533203, 418.93475341796875, 96.03800964355469, 122.87006378173828, 236.12088012695312, -224.42062377929688, 151.147705078125, 258.56890869140625, 176.6490936279297, 152.20187377929688, 104.39491271972656, -205.83404541015625, 324.6680603027344, -121.99872589111328, 278.01837158203125, 485.82293701171875, 94.260498046875, 31.351173400878906, 16.556861877441406, 153.64340209960938, 21.808547973632812, 160.21676635742188, -43.67588806152344, 197.7211151123047, 198.10154724121094, 44.551361083984375, 156.59092712402344, -23.646686553955078, 201.4569091796875, -27.93488311767578, 91.27702331542969, 84.5677261352539, 76.40480041503906, 331.23150634765625, 240.13641357421875, 113.51555633544922, 234.91201782226562, 149.62301635742188, 47.91542053222656, -36.10377502441406, 173.7422332763672, 367.6912841796875, 65.53402709960938, 403.0927429199219, 215.11849975585938, 116.74378967285156, -108.54827880859375, 220.27682495117188, 20.17070770263672, 539.1654663085938, 55.982177734375, 407.64599609375, 106.18144226074219], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000222.npy"}
|
|
{"epoch": 0.32599118942731276, "step": 223, "batch_size": 64, "mean": 108.11112213134766, "std": 176.57351684570312, "min": -321.70465087890625, "p10": -107.57542266845702, "median": 109.78140640258789, "p90": 307.8881256103516, "max": 630.3362426757812, "pos_frac": 0.75, "sample": [66.46580505371094, -69.88190460205078, 206.37442016601562, 140.79107666015625, 141.03933715820312, 360.9715270996094, 193.42718505859375, 97.4629135131836, 204.0455322265625, 156.00531005859375, -148.3977813720703, 52.962127685546875, 90.61491394042969, 233.75411987304688, 300.67828369140625, 290.4259948730469, 138.64505004882812, 86.42503356933594, 533.1952514648438, -12.12430191040039, 146.88397216796875, -57.24413299560547, 86.04154968261719, -56.99100875854492, -55.993255615234375, 294.9594421386719, -140.6066436767578, 370.1899719238281, 104.39875030517578, -108.4780044555664, 1.1115264892578125, 20.970291137695312, 264.5784912109375, -321.70465087890625, 115.1640625, 148.54562377929688, -197.80418395996094, 194.84567260742188, 70.06427001953125, 630.3362426757812, 242.1010284423828, -105.46939849853516, 170.45606994628906, 259.4590759277344, -163.2891845703125, -57.04703140258789, 285.0249938964844, 327.37725830078125, 154.02772521972656, -17.982284545898438, 52.383697509765625, 42.07600402832031, 61.93218994140625, 423.89166259765625, 27.989707946777344, 24.442665100097656, -10.559179306030273, 10.895879745483398, 145.67852783203125, 137.61837768554688, 117.40570831298828, 163.91729736328125, -256.3447570800781, 310.9780578613281], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000223.npy"}
|
|
{"epoch": 0.3274596182085169, "step": 224, "batch_size": 64, "mean": 123.00054168701172, "std": 184.1900177001953, "min": -331.42901611328125, "p10": -73.65997009277343, "median": 67.24323654174805, "p90": 364.91593933105474, "max": 662.2747192382812, "pos_frac": 0.8125, "sample": [246.63230895996094, 3.62603759765625, 127.01066589355469, 32.290184020996094, -75.87986755371094, 543.3571166992188, 237.7719268798828, 40.46234893798828, 69.25450897216797, -331.42901611328125, 46.19794464111328, 189.40953063964844, 139.3958740234375, 158.32073974609375, 437.92022705078125, 89.78790283203125, 57.438743591308594, 224.4812774658203, 191.25738525390625, 662.2747192382812, 287.1253662109375, 110.03985595703125, 351.2599792480469, 248.98785400390625, -136.6374969482422, -83.5498046875, 264.6682434082031, 62.65128707885742, 228.91943359375, -17.483596801757812, 6.3634796142578125, 34.41900634765625, 31.33085060119629, 640.787841796875, 281.8861389160156, 243.97735595703125, 118.75446319580078, -76.1294937133789, 19.505903244018555, 26.724102020263672, 248.5235595703125, 13.14373779296875, 47.26435852050781, -21.866798400878906, 107.45405578613281, -76.57671356201172, -68.48020935058594, 3.3914337158203125, -107.37217712402344, 9.84035873413086, 174.54522705078125, 133.3582763671875, 370.76849365234375, 10.638643264770508, 7.840370178222656, 65.23196411132812, 5.4806060791015625, 310.7666320800781, 37.468162536621094, -64.32183837890625, 87.69429016113281, 435.40643310546875, 427.9051818847656, -19.250797271728516], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000224.npy"}
|
|
{"epoch": 0.328928046989721, "step": 225, "batch_size": 64, "mean": 144.70822143554688, "std": 182.31053161621094, "min": -242.634033203125, "p10": -90.03026199340819, "median": 146.2316131591797, "p90": 400.06481018066415, "max": 709.7904052734375, "pos_frac": 0.71875, "sample": [233.36151123046875, 163.72689819335938, 417.8214111328125, 354.59100341796875, 33.87830352783203, 110.5674819946289, 38.496002197265625, -98.44085693359375, -144.5419921875, 437.47314453125, 364.13958740234375, 282.30694580078125, -144.0321502685547, -70.4055404663086, 280.82208251953125, -36.800594329833984, -109.8977279663086, 17.47047996520996, -19.31142807006836, 60.69007873535156, 111.46615600585938, 120.03738403320312, 226.98297119140625, 175.72048950195312, -9.98591423034668, 148.25131225585938, 21.973983764648438, 169.69082641601562, -10.35384750366211, 247.18408203125, 312.13140869140625, 170.2938690185547, 224.74073791503906, 102.91243743896484, -33.70277786254883, 462.16754150390625, 444.75970458984375, -6.965522766113281, 258.2379455566406, 408.3313903808594, 99.07003784179688, 337.61669921875, 187.720947265625, -42.31603240966797, 161.81199645996094, 144.2119140625, 709.7904052734375, 284.7452087402344, 170.98501586914062, 77.19609069824219, 411.5115966796875, -2.408660888671875, 220.75106811523438, -103.13470458984375, -101.42073059082031, 380.776123046875, 295.99652099609375, 177.50497436523438, 177.96734619140625, -242.634033203125, 120.1275405883789, 141.46279907226562, -54.21361541748047, -7.580780029296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000225.npy"}
|
|
{"epoch": 0.3303964757709251, "step": 226, "batch_size": 64, "mean": 115.01837921142578, "std": 170.88479614257812, "min": -220.91390991210938, "p10": -39.51744155883788, "median": 77.23161315917969, "p90": 331.7192749023438, "max": 592.187744140625, "pos_frac": 0.75, "sample": [592.187744140625, -154.896484375, -44.515621185302734, 18.8863525390625, -12.015701293945312, 167.3683624267578, -9.431304931640625, 58.942996978759766, 176.036865234375, -27.855022430419922, 122.9085922241211, 13.689849853515625, -18.143508911132812, 32.393768310546875, -182.8189697265625, 93.06929016113281, -104.84654235839844, 317.697998046875, 310.4482421875, 38.870872497558594, 337.7283935546875, 353.6121520996094, 74.91948699951172, 128.65902709960938, 189.6487274169922, 295.14971923828125, 24.097007751464844, -24.55877685546875, 131.464111328125, 431.8475646972656, 293.567138671875, -71.9444808959961, 182.42828369140625, 79.54373931884766, 46.15312194824219, 547.5573120117188, 236.17721557617188, 519.2514038085938, 298.0694274902344, -7.772407531738281, -4.387298583984375, 141.71109008789062, -220.91390991210938, 157.366943359375, 45.82228469848633, 112.78779602050781, -0.7314071655273438, 2.4044036865234375, 19.59417152404785, 84.95661163330078, -10.427886962890625, 259.25482177734375, 110.0560302734375, 200.78192138671875, 39.786582946777344, 121.15336608886719, -101.75079345703125, 6.340044021606445, 489.5035400390625, 60.58319854736328, 35.677162170410156, 141.5442352294922, 9.634124755859375, 206.85354614257812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000226.npy"}
|
|
{"epoch": 0.33186490455212925, "step": 227, "batch_size": 64, "mean": 144.60397338867188, "std": 174.37486267089844, "min": -362.8921203613281, "p10": -19.355463790893552, "median": 114.8946533203125, "p90": 363.80503845214844, "max": 643.6366577148438, "pos_frac": 0.859375, "sample": [311.219970703125, 101.30561828613281, 145.37832641601562, 381.39166259765625, 229.49703979492188, 39.34007263183594, 54.99861145019531, 107.88182830810547, 309.99395751953125, 10.31866455078125, 104.5632553100586, 114.31500244140625, 178.23599243164062, 24.080312728881836, -19.888736724853516, 26.66461181640625, 166.53399658203125, 89.64134216308594, 198.2292938232422, 643.6366577148438, 171.8036346435547, 86.80206298828125, 325.0457458496094, 222.88111877441406, 41.610923767089844, 289.91375732421875, 169.18011474609375, 247.8548583984375, 111.2874755859375, 95.360107421875, 175.38446044921875, 56.39329528808594, 333.66485595703125, 5.740684509277344, -83.562255859375, -99.39384460449219, 191.892333984375, 218.76724243164062, 503.09210205078125, 115.47430419921875, 188.23980712890625, 477.8398742675781, -108.52171325683594, 582.03076171875, 5.151044845581055, 20.37006378173828, 189.96084594726562, 34.964210510253906, 221.81753540039062, 36.17540740966797, 131.6300506591797, 363.3486328125, 117.7650146484375, -18.111160278320312, 6.140459060668945, -51.427223205566406, 364.0006408691406, -362.8921203613281, -7.849615097045898, 85.75389099121094, -58.203819274902344, 22.179086685180664, 134.7720184326172, 482.9903259277344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000227.npy"}
|
|
{"epoch": 0.3333333333333333, "step": 228, "batch_size": 64, "mean": 102.26102447509766, "std": 165.03652954101562, "min": -272.5915832519531, "p10": -102.06206588745115, "median": 103.093017578125, "p90": 313.5241882324219, "max": 492.72515869140625, "pos_frac": 0.734375, "sample": [-179.33950805664062, -233.90972900390625, -272.5915832519531, 211.96841430664062, 137.78573608398438, 323.51483154296875, -190.69952392578125, 166.7933349609375, 197.52871704101562, -155.38380432128906, 25.9862060546875, 293.2867736816406, 492.72515869140625, 321.61260986328125, 450.62799072265625, 315.88714599609375, 355.1817626953125, 174.2430877685547, 117.30018615722656, -33.692657470703125, -40.90081787109375, 296.65728759765625, 108.63336944580078, 308.0106201171875, 178.05474853515625, 156.2818145751953, 124.77227783203125, 17.187602996826172, -82.1101303100586, 115.39561462402344, 33.31709289550781, 202.9947052001953, 5.450340270996094, 204.59942626953125, 221.45126342773438, 80.58912658691406, 119.26374053955078, 77.09909057617188, 35.32630920410156, 29.5390625, 89.24883270263672, 241.845947265625, -87.46054077148438, -13.883895874023438, 95.04508972167969, 97.55266571044922, 453.6795959472656, 19.317703247070312, 82.51713562011719, 56.616355895996094, 265.21697998046875, -0.023151397705078125, -24.86615753173828, 191.28848266601562, 234.76133728027344, 164.31301879882812, 203.16517639160156, 110.2424545288086, -108.31986236572266, -25.139739990234375, -19.456382751464844, 31.003158569335938, -170.99249267578125, -51.40376281738281], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000228.npy"}
|
|
{"epoch": 0.33480176211453744, "step": 229, "batch_size": 64, "mean": 109.92990112304688, "std": 164.21502685546875, "min": -409.8575439453125, "p10": -57.41001205444335, "median": 92.88893127441406, "p90": 342.3139190673828, "max": 567.6272583007812, "pos_frac": 0.75, "sample": [-26.446182250976562, 14.00429916381836, -116.68603515625, 412.17132568359375, 53.80149459838867, -186.71917724609375, 104.61966705322266, 185.02774047851562, 73.9691162109375, 218.8052978515625, 255.24224853515625, 340.08251953125, 96.00398254394531, 351.3622131347656, 89.77388000488281, 97.0441665649414, 252.84158325195312, 106.42646026611328, 35.84097671508789, 351.0220031738281, -90.38148498535156, 63.56494140625, -4.232051849365234, 215.4320831298828, 136.58555603027344, 42.62596893310547, 567.6272583007812, -150.74017333984375, 50.94459533691406, 241.41558837890625, 175.8643798828125, 250.445556640625, 187.30084228515625, 363.4146728515625, 343.2702331542969, 45.396278381347656, 327.7879638671875, 37.42688751220703, 82.41668701171875, -61.928375244140625, 52.556861877441406, 119.47206115722656, 18.250083923339844, 208.73834228515625, -37.423866271972656, 165.1696014404297, 190.0419921875, 225.57154846191406, -33.16943359375, -128.7744140625, 126.4371566772461, 437.59722900390625, 29.89353370666504, 81.91700744628906, 80.61162567138672, 124.89607238769531, -11.764518737792969, 181.3802490234375, -14.998947143554688, 165.5751495361328, -409.8575439453125, -0.7602157592773438, -21.403793334960938, -46.867164611816406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000229.npy"}
|
|
{"epoch": 0.33627019089574156, "step": 230, "batch_size": 64, "mean": 165.46487426757812, "std": 161.83778381347656, "min": -272.88641357421875, "p10": -28.05664730072021, "median": 157.3803253173828, "p90": 404.85068969726564, "max": 495.750244140625, "pos_frac": 0.84375, "sample": [80.48379516601562, 367.0692138671875, -15.607694625854492, 297.7232666015625, 216.58444213867188, 78.55870056152344, -29.275606155395508, 252.7642364501953, 98.11361694335938, 129.92572021484375, 213.3223419189453, 177.93563842773438, 21.49773406982422, 246.48858642578125, 9.977127075195312, 112.35055541992188, -52.16053009033203, 80.51699829101562, 98.52059936523438, 127.49461364746094, 26.9697265625, -97.3505859375, -25.21240997314453, 205.89437866210938, 139.34091186523438, 170.61587524414062, 224.00367736816406, 353.7427978515625, 327.6338806152344, 157.49566650390625, 79.60831451416016, 15.296195983886719, 335.716796875, 453.1526794433594, 170.8915557861328, -105.7057113647461, 156.636474609375, 174.9552001953125, 237.1592559814453, 450.89532470703125, -34.8765983581543, -10.98590087890625, -272.88641357421875, -56.160865783691406, 45.20896911621094, 49.84431457519531, 408.0469970703125, 134.408935546875, 376.6272888183594, 454.3701171875, 168.15621948242188, 107.05096435546875, 115.93324279785156, 23.9307804107666, 414.2451171875, 315.7606201171875, 470.4193420410156, 215.6398468017578, 397.39263916015625, 207.82102966308594, 217.73252868652344, 157.26498413085938, 225.03367614746094, 495.750244140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000230.npy"}
|
|
{"epoch": 0.3377386196769457, "step": 231, "batch_size": 64, "mean": 115.23086547851562, "std": 154.40179443359375, "min": -136.15185546875, "p10": -42.93104019165039, "median": 90.8141975402832, "p90": 342.79926452636727, "max": 652.5733642578125, "pos_frac": 0.796875, "sample": [460.7806396484375, 84.30445861816406, -42.58086395263672, 268.50164794921875, 13.531913757324219, 80.50186920166016, 350.9131164550781, -14.415283203125, 66.90554809570312, 107.95462799072266, 102.07162475585938, 358.0809326171875, -69.3526611328125, 88.16224670410156, 478.8570861816406, 159.8236541748047, 210.7355194091797, 87.908203125, 144.81451416015625, 213.20297241210938, 400.4708557128906, 94.20967102050781, 115.86710357666016, 14.969894409179688, 45.629638671875, -82.31043243408203, 323.866943359375, -54.92485427856445, 203.08749389648438, 652.5733642578125, 462.3946228027344, 238.68218994140625, 134.54747009277344, 36.377105712890625, -136.15185546875, 73.68389892578125, 101.34820556640625, 165.05245971679688, 92.43875122070312, -43.08111572265625, -9.94793701171875, 30.792268753051758, 223.73739624023438, 68.55899047851562, -34.061485290527344, 125.9549789428711, -36.157623291015625, -100.9006576538086, 52.39701461791992, 120.30093383789062, 287.51507568359375, 156.72500610351562, 89.18964385986328, 1.1389427185058594, -27.266437530517578, 118.95439147949219, -98.27354431152344, 100.61271667480469, 27.15576171875, 7.142333984375, 11.691268920898438, 140.2918701171875, 97.87435913085938, 31.917037963867188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000231.npy"}
|
|
{"epoch": 0.3392070484581498, "step": 232, "batch_size": 64, "mean": 96.52035522460938, "std": 164.16481018066406, "min": -166.6179962158203, "p10": -75.64318771362305, "median": 57.91950798034668, "p90": 327.5772888183594, "max": 631.88671875, "pos_frac": 0.734375, "sample": [183.71475219726562, 93.8177719116211, 12.864139556884766, 39.25128936767578, 17.241470336914062, -147.2773895263672, 334.5607604980469, 254.96566772460938, -28.144729614257812, 250.43609619140625, 47.46190643310547, 452.82769775390625, 80.99774932861328, -71.7704849243164, -24.861663818359375, 70.6149673461914, -32.31683349609375, 196.0487060546875, -107.1068115234375, 77.6779556274414, 149.9492645263672, 579.6434936523438, 71.63803100585938, 89.23883056640625, 128.30267333984375, -36.0123405456543, 631.88671875, -8.064895629882812, 35.223724365234375, 260.9204406738281, 139.066162109375, 205.05929565429688, -57.4381103515625, 76.69921875, -90.48443603515625, 139.73988342285156, -105.05086517333984, 26.07433319091797, -77.30291748046875, 437.82647705078125, 30.571380615234375, -63.279869079589844, 11.751419067382812, -104.34828186035156, 44.69777297973633, 73.3098373413086, 50.495941162109375, -166.6179962158203, 277.132568359375, 136.0036163330078, 349.98529052734375, 27.202526092529297, 0.22088623046875, 329.57037353515625, 225.2569580078125, -33.46263122558594, 118.44831085205078, 322.9267578125, -24.025251388549805, 50.272560119628906, 59.89023208618164, 37.616641998291016, 69.81683349609375, 55.94878387451172], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000232.npy"}
|
|
{"epoch": 0.3406754772393539, "step": 233, "batch_size": 64, "mean": 146.88909912109375, "std": 210.91017150878906, "min": -228.91180419921875, "p10": -69.30752716064453, "median": 92.56890869140625, "p90": 458.0004516601563, "max": 665.9832763671875, "pos_frac": 0.734375, "sample": [14.742769241333008, 9.62942886352539, 484.21160888671875, -33.28443908691406, 193.64102172851562, 196.28640747070312, -164.9335479736328, -115.66504669189453, 0.6105194091796875, -168.54425048828125, 115.39112854003906, 36.810699462890625, 165.04026794433594, 271.8822021484375, 280.7849426269531, 665.9832763671875, 126.08563995361328, 74.40963745117188, 516.6041259765625, 333.4521484375, 46.68782043457031, 150.75404357910156, 79.27790832519531, 558.1654052734375, 17.579063415527344, 303.02490234375, 1.0062103271484375, -71.79141235351562, -192.875, -59.922489166259766, 99.12205505371094, 458.2084045410156, 209.87911987304688, 159.77587890625, 289.06427001953125, 457.5152282714844, 40.873451232910156, 429.78887939453125, -63.51179504394531, 44.65394592285156, 129.11859130859375, 422.23834228515625, -27.230709075927734, 340.2275085449219, 72.03366088867188, 361.329345703125, -51.90940475463867, -228.91180419921875, -136.4980010986328, -27.84295654296875, 45.50562286376953, -24.26581573486328, 335.88385009765625, 85.72611999511719, -30.170440673828125, 222.08421325683594, -22.234691619873047, 473.1295166015625, -41.108524322509766, 611.9312744140625, 361.3551940917969, 86.01576232910156, 282.87774658203125, 201.2041015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000233.npy"}
|
|
{"epoch": 0.342143906020558, "step": 234, "batch_size": 64, "mean": 151.15582275390625, "std": 175.16961669921875, "min": -153.66893005371094, "p10": -54.30388183593748, "median": 136.221923828125, "p90": 370.6693298339844, "max": 664.4743041992188, "pos_frac": 0.78125, "sample": [-79.1217041015625, 280.95819091796875, 295.72711181640625, 221.898193359375, 349.9168395996094, 73.10918426513672, 150.8636474609375, 191.13330078125, 431.6614074707031, -37.35453796386719, 261.5061340332031, 149.4317169189453, -153.66893005371094, -1.1686058044433594, 82.01914978027344, 255.86865234375, 66.45889282226562, -102.5635757446289, 75.22402954101562, 290.78826904296875, 664.4743041992188, 142.25457763671875, -10.273614883422852, -25.17211151123047, 57.229331970214844, -22.192581176757812, 175.160888671875, 353.9813537597656, 5.6920318603515625, 334.7762451171875, -8.364641189575195, 163.09791564941406, 436.35040283203125, 109.66414642333984, 511.44390869140625, 130.18927001953125, 30.375144958496094, 156.46951293945312, 67.88310241699219, 267.92620849609375, 278.6670227050781, 153.34080505371094, 279.2081604003906, 64.21215057373047, 17.080039978027344, 86.9898681640625, -93.998046875, -8.448997497558594, 246.47662353515625, 73.24131774902344, 78.92444610595703, 451.70098876953125, 602.6033935546875, -99.76038360595703, 277.5317687988281, 165.31103515625, -61.56788635253906, 6.742696762084961, 182.28089904785156, 47.93168640136719, 123.89312744140625, -100.33065795898438, 377.8213195800781, 180.46835327148438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000234.npy"}
|
|
{"epoch": 0.3436123348017621, "step": 235, "batch_size": 64, "mean": 97.06028747558594, "std": 173.10166931152344, "min": -429.21685791015625, "p10": -69.84274444580078, "median": 109.66214752197266, "p90": 300.5439849853517, "max": 550.0272216796875, "pos_frac": 0.75, "sample": [32.39302062988281, 221.06448364257812, -429.21685791015625, 29.848236083984375, 161.67108154296875, 492.40826416015625, 162.451904296875, 318.4369812011719, -42.052738189697266, 199.1664581298828, -152.85360717773438, -20.7060546875, 155.64321899414062, 273.73944091796875, 46.319305419921875, 31.368911743164062, -61.80589294433594, 32.80583953857422, 108.60639953613281, -302.8197021484375, 0.18134689331054688, 20.710906982421875, -2.9579620361328125, 59.19959259033203, 355.1390075683594, 218.5701141357422, -36.090187072753906, -57.50543212890625, 166.943115234375, 208.94004821777344, 121.67828369140625, 121.84141540527344, 312.0316467285156, -72.73321533203125, 122.17732238769531, 198.63214111328125, 1.1854076385498047, 230.85589599609375, 25.886276245117188, 175.8502197265625, 37.28208541870117, 27.350860595703125, 550.0272216796875, 234.55953979492188, 141.80352783203125, 160.0301055908203, 168.24652099609375, -40.438499450683594, -201.2022705078125, 137.64144897460938, 497.3404541015625, 265.04876708984375, -145.3917236328125, 6.7751922607421875, 110.7178955078125, -13.108953475952148, -75.26947784423828, 186.0337371826172, 88.68173217773438, 16.654220581054688, 182.9866943359375, 144.9435272216797, 367.2394714355469, -63.09831237792969], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000235.npy"}
|
|
{"epoch": 0.34508076358296624, "step": 236, "batch_size": 64, "mean": 155.12411499023438, "std": 140.9580078125, "min": -116.2854995727539, "p10": -16.265092849731445, "median": 131.43243408203125, "p90": 348.24792785644536, "max": 463.7901306152344, "pos_frac": 0.875, "sample": [366.3043518066406, 76.88054656982422, 18.316970825195312, 145.3653564453125, 194.58204650878906, 136.36216735839844, 434.4786071777344, 18.431732177734375, 70.2791748046875, 130.39669799804688, 204.3623046875, 338.5730895996094, 98.60628509521484, 200.02691650390625, 310.71563720703125, 337.1770324707031, 79.28657531738281, 255.5199432373047, -15.633583068847656, 352.394287109375, 141.612060546875, -41.16081237792969, 79.98922729492188, 288.3225402832031, 205.52230834960938, 20.340587615966797, 132.46817016601562, 332.8818359375, 255.07443237304688, 51.061378479003906, 463.7901306152344, 112.42720794677734, -34.31718444824219, 250.2266387939453, 5.907390594482422, 249.04937744140625, 137.71405029296875, 448.44366455078125, 95.5072250366211, -16.53573989868164, 121.9354019165039, 114.73145294189453, 30.65570640563965, 50.899574279785156, 7.997703552246094, 195.15760803222656, 108.58171081542969, 108.29566192626953, -70.65924072265625, -116.2854995727539, 161.93179321289062, 455.2524719238281, 353.12017822265625, 115.69131469726562, 216.43826293945312, 113.01187133789062, 314.3359069824219, 96.9717025756836, 4.154014587402344, 197.4384307861328, -64.63041687011719, 336.01873779296875, -25.293476104736328, 171.4425506591797], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000236.npy"}
|
|
{"epoch": 0.3465491923641703, "step": 237, "batch_size": 64, "mean": 120.29085540771484, "std": 172.5857391357422, "min": -333.42486572265625, "p10": -66.97137527465819, "median": 114.28156661987305, "p90": 356.88437500000026, "max": 678.9408569335938, "pos_frac": 0.765625, "sample": [182.07969665527344, 194.9283447265625, 209.61578369140625, 154.05197143554688, -0.4024848937988281, 113.43570709228516, 105.5893783569336, 142.57281494140625, 28.101268768310547, 162.55764770507812, 54.91279602050781, 31.978485107421875, 411.12945556640625, 128.59976196289062, 207.29107666015625, -45.02207946777344, -107.32722473144531, 150.02590942382812, -70.9963150024414, 97.1489486694336, 133.69134521484375, -107.67494201660156, 383.36962890625, 678.9408569335938, -57.57984924316406, 398.2938232421875, 294.858154296875, 17.374786376953125, 12.661605834960938, 13.84377670288086, 480.0618896484375, 115.12742614746094, 201.32691955566406, 230.39939880371094, 26.22321319580078, 201.19117736816406, 33.50171661376953, 123.12097930908203, 295.08544921875, -39.830711364746094, 218.83889770507812, 98.09066009521484, 188.97039794921875, 256.1301574707031, 407.2777099609375, 45.629486083984375, -0.010162353515625, 26.220298767089844, -333.42486572265625, 273.92095947265625, -31.68170738220215, 78.29499816894531, -168.8955078125, -42.97200012207031, 187.57440185546875, 278.0091552734375, -74.33135986328125, 462.5086669921875, 7.342536926269531, -125.72897338867188, -46.858917236328125, 119.59606170654297, 70.65679931640625, 219.1993408203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000237.npy"}
|
|
{"epoch": 0.34801762114537443, "step": 238, "batch_size": 64, "mean": 118.46604919433594, "std": 172.0164031982422, "min": -166.58364868164062, "p10": -45.28942718505858, "median": 79.00665283203125, "p90": 284.3832458496094, "max": 781.1458129882812, "pos_frac": 0.765625, "sample": [-6.2221832275390625, -28.781524658203125, 4.49053955078125, 135.3350067138672, 20.911930084228516, -5.666740417480469, 130.8827362060547, 273.1304931640625, -104.85067749023438, 428.83782958984375, 74.18378448486328, 159.93252563476562, 69.02415466308594, 64.11944580078125, -22.286874771118164, 198.96385192871094, 288.6617431640625, -60.456695556640625, 46.266929626464844, 46.90510940551758, 2.807058334350586, 638.44677734375, 32.9326171875, 151.8203582763672, -52.36424255371094, 513.299560546875, 104.61592102050781, 14.830177307128906, 134.23313903808594, -98.97431945800781, 136.89199829101562, 119.47442626953125, 157.41134643554688, 117.25252532958984, 358.8226318359375, 21.459850311279297, 57.107357025146484, 126.91291809082031, 781.1458129882812, 139.390380859375, -15.134281158447266, 13.0079345703125, 53.46733474731445, -113.39958953857422, -16.327957153320312, 239.25289916992188, 212.96429443359375, 183.67111206054688, 65.21479797363281, 454.9225769042969, 203.47862243652344, 18.558639526367188, 172.56100463867188, 223.70120239257812, -166.58364868164062, 274.40008544921875, 268.3064270019531, 37.383819580078125, -19.78496551513672, -19.605880737304688, 166.5897216796875, 159.25711059570312, -68.80096435546875, 83.82952117919922], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000238.npy"}
|
|
{"epoch": 0.34948604992657856, "step": 239, "batch_size": 64, "mean": 130.94488525390625, "std": 164.232666015625, "min": -269.0634460449219, "p10": -46.35030937194824, "median": 120.25017166137695, "p90": 352.55278930664065, "max": 502.6390075683594, "pos_frac": 0.78125, "sample": [381.73876953125, 171.75413513183594, -154.73318481445312, 355.635498046875, -91.44029235839844, 188.0789794921875, 64.47164916992188, 161.10675048828125, -47.474788665771484, -145.81796264648438, 46.57497024536133, 42.545623779296875, 74.28132629394531, 259.78741455078125, 73.83317565917969, 6.778053283691406, 502.6390075683594, 117.13041687011719, 174.55706787109375, 127.43669891357422, 219.1327362060547, 177.3876190185547, 252.3537139892578, -61.67171859741211, -12.979560852050781, 40.46241760253906, 177.83041381835938, 373.3576354980469, 214.32896423339844, 108.20878601074219, -6.686178207397461, 22.817169189453125, 321.3807373046875, 414.57269287109375, -180.8488311767578, 86.66300201416016, -36.948265075683594, 345.35980224609375, -19.241928100585938, 335.6672058105469, 24.78201675415039, 340.8192138671875, 315.17413330078125, 151.82699584960938, -14.274154663085938, 41.921714782714844, -269.0634460449219, 136.15818786621094, -33.65365219116211, 150.33119201660156, 138.6648406982422, 276.00933837890625, 305.078857421875, 64.58047485351562, 479.87213134765625, 210.6678009033203, 385.9906005859375, 79.80690002441406, 105.50132751464844, 123.36992645263672, 51.030914306640625, -43.726524353027344, 261.40350341796875, 18.170766830444336], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000239.npy"}
|
|
{"epoch": 0.3509544787077827, "step": 240, "batch_size": 64, "mean": 109.36317443847656, "std": 183.02085876464844, "min": -239.240234375, "p10": -173.282275390625, "median": 121.3446159362793, "p90": 356.85751342773443, "max": 539.915283203125, "pos_frac": 0.765625, "sample": [137.50247192382812, 143.4476776123047, 152.5899658203125, 98.71183013916016, 138.16114807128906, 347.964599609375, 143.7794952392578, 412.04852294921875, -211.2548828125, -52.32499694824219, 189.76055908203125, 157.0896453857422, -239.240234375, 166.3980712890625, 360.66876220703125, 76.72814178466797, -181.84600830078125, 67.25567626953125, 36.146217346191406, 239.6959228515625, 188.70962524414062, 404.56884765625, 154.8074951171875, 156.42697143554688, 67.00370788574219, 6.375881195068359, -221.72967529296875, 121.2240982055664, 78.23245239257812, 506.873046875, 155.3924102783203, 118.31962585449219, 85.15202331542969, 28.228111267089844, -201.92242431640625, 317.9942321777344, 239.6692352294922, 174.8298797607422, -208.02838134765625, 69.89598846435547, -63.91463088989258, 275.9406433105469, -153.30023193359375, 35.28791427612305, 539.915283203125, -119.21353149414062, 278.4532470703125, -27.5972843170166, 259.3970947265625, 182.1095428466797, 121.46513366699219, 424.1380615234375, -46.174224853515625, 93.12065887451172, -109.09541320800781, 141.30039978027344, 95.73656463623047, -63.63861083984375, 111.87655639648438, 176.07249450683594, 48.87992858886719, 132.78013610839844, 470.1935119628906, -229.79566955566406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000240.npy"}
|
|
{"epoch": 0.3524229074889868, "step": 241, "batch_size": 64, "mean": 170.4112548828125, "std": 201.1011505126953, "min": -230.14488220214844, "p10": -34.05300331115723, "median": 139.50295639038086, "p90": 418.9946441650391, "max": 837.898193359375, "pos_frac": 0.828125, "sample": [96.22814178466797, 85.73231506347656, -214.85134887695312, -21.5361270904541, 45.07501220703125, 241.40066528320312, 235.81167602539062, 452.05621337890625, 77.64004516601562, 181.81387329101562, 115.64031982421875, -52.684814453125, -15.830005645751953, 499.77178955078125, 1.5157155990600586, 165.69427490234375, -35.250484466552734, 345.94049072265625, 162.66302490234375, 153.0823516845703, 359.68353271484375, 122.75051879882812, 100.94632720947266, 119.78849792480469, 837.898193359375, 403.5307922363281, 214.59146118164062, 125.9235610961914, 570.3349609375, 41.515380859375, 775.4867553710938, 425.62200927734375, 308.224609375, 66.40941619873047, 90.70945739746094, 156.1827850341797, 229.81192016601562, 95.64087677001953, -31.258880615234375, 163.49635314941406, 113.28961944580078, 49.58794403076172, 102.24652099609375, -11.2208251953125, 218.43753051757812, 63.95703125, 295.3316955566406, -57.31001663208008, 214.34535217285156, 184.65380859375, 69.97074890136719, 34.533775329589844, 252.2874755859375, 483.0439758300781, 285.42669677734375, 171.49331665039062, -190.02825927734375, 279.18157958984375, 264.8397521972656, 259.72393798828125, -230.14488220214844, 380.1669006347656, 90.46884155273438, -115.16342163085938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000241.npy"}
|
|
{"epoch": 0.35389133627019087, "step": 242, "batch_size": 64, "mean": 123.42953491210938, "std": 179.42074584960938, "min": -274.43609619140625, "p10": -48.05109252929686, "median": 84.22797775268555, "p90": 316.7211334228516, "max": 704.0394897460938, "pos_frac": 0.8125, "sample": [-30.23047637939453, 1.6023826599121094, 148.60562133789062, 254.06141662597656, 35.633846282958984, -274.43609619140625, 9.127593994140625, -9.902015686035156, 319.68865966796875, 143.385009765625, 29.699010848999023, 264.67633056640625, 651.8072509765625, 10.573209762573242, 13.792854309082031, 73.26961517333984, 113.07073211669922, 120.3460693359375, 49.4783935546875, 8.56574821472168, 219.09500122070312, 65.63922119140625, 245.9420166015625, 227.01773071289062, 259.1728515625, -67.49950408935547, 84.25887298583984, 45.360801696777344, 120.38080596923828, -117.17776489257812, 156.1302490234375, 206.72122192382812, 93.54869842529297, -138.2017822265625, 257.4004821777344, 106.2078628540039, 309.7969055175781, 458.16754150390625, 7.185554504394531, 82.40132141113281, 75.17181396484375, 207.96739196777344, -28.628646850585938, 704.0394897460938, 36.27277374267578, 72.03510284423828, 477.15338134765625, 84.19708251953125, 20.516983032226562, -7.53901481628418, -116.45732879638672, 474.5418701171875, 152.66285705566406, -24.94361114501953, 61.47319030761719, 116.050537109375, 147.80299377441406, -139.20716857910156, 248.16636657714844, 232.44287109375, 1.8144550323486328, 457.7498474121094, 147.53225708007812, -55.688499450683594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000242.npy"}
|
|
{"epoch": 0.355359765051395, "step": 243, "batch_size": 64, "mean": 151.84548950195312, "std": 185.93345642089844, "min": -315.9930725097656, "p10": -64.8328269958496, "median": 135.6540985107422, "p90": 386.5651733398438, "max": 702.1101684570312, "pos_frac": 0.828125, "sample": [390.646484375, 30.356029510498047, 80.74124908447266, 453.7283935546875, 181.744384765625, 64.58187103271484, 131.16551208496094, 168.8026123046875, -98.02363586425781, -105.33147430419922, -70.85836791992188, 93.86856079101562, 227.12619018554688, 22.212594985961914, -140.1192169189453, 12.064727783203125, -143.45071411132812, 339.84393310546875, 148.9220733642578, 171.0294189453125, -315.9930725097656, 48.25147247314453, 174.52566528320312, 20.237770080566406, 26.394296646118164, 331.785888671875, 52.056243896484375, 198.61410522460938, 85.90938568115234, 406.1422119140625, 292.5263977050781, 267.7518310546875, -50.773231506347656, 152.34976196289062, -109.40695190429688, 340.0814208984375, 262.7274475097656, 568.9251708984375, 364.607666015625, 261.2310791015625, 299.3892822265625, 134.53115844726562, 377.0421142578125, 702.1101684570312, 455.1817626953125, 42.7244873046875, 132.15545654296875, 43.7520637512207, 371.3472900390625, -11.796249389648438, 155.03515625, 215.98245239257812, 24.30218505859375, 142.8004150390625, 496.5339050292969, 26.60022735595703, 136.77703857421875, 75.50660705566406, -6.729576110839844, 242.644287109375, -4.7256927490234375, 61.54579162597656, 59.17359924316406, 209.23187255859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000243.npy"}
|
|
{"epoch": 0.3568281938325991, "step": 244, "batch_size": 64, "mean": 159.75762939453125, "std": 171.19309997558594, "min": -165.942626953125, "p10": -33.00220394134521, "median": 137.2669448852539, "p90": 425.1889862060549, "max": 638.5193481445312, "pos_frac": 0.828125, "sample": [64.27830505371094, 32.73474884033203, 114.22038269042969, 180.67330932617188, 28.165035247802734, 54.88274002075195, 461.1360778808594, 139.13995361328125, -57.4036865234375, -39.91180419921875, 171.08750915527344, 246.33612060546875, 14.75885009765625, 315.54351806640625, 40.163116455078125, 135.39393615722656, -1.0893592834472656, 448.8059387207031, 22.36069107055664, 125.5273208618164, -165.942626953125, 118.61225891113281, 311.062744140625, -64.63131713867188, 178.07568359375, 31.501684188842773, 167.38327026367188, 226.9217529296875, 144.6389923095703, 38.71783447265625, 260.94989013671875, 128.58084106445312, 341.9237365722656, 351.8547668457031, 498.69622802734375, 548.0985717773438, 159.4373321533203, 448.0484313964844, 55.89987564086914, 638.5193481445312, 239.33929443359375, -12.953620910644531, 371.85028076171875, 180.00146484375, 146.15235900878906, -88.86498260498047, 130.2507781982422, -26.400056838989258, -35.831695556640625, 258.08441162109375, 22.173416137695312, 0.48630523681640625, 148.2867889404297, 82.52452850341797, -26.318511962890625, 112.56452941894531, 362.5175476074219, 252.10479736328125, -69.62858581542969, 151.93710327148438, 84.3370361328125, 486.37493896484375, 229.27476501464844, 311.0728454589844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000244.npy"}
|
|
{"epoch": 0.35829662261380324, "step": 245, "batch_size": 64, "mean": 123.15798950195312, "std": 183.0565643310547, "min": -432.4191589355469, "p10": -28.937536239624016, "median": 92.18741989135742, "p90": 329.82048950195326, "max": 639.23876953125, "pos_frac": 0.78125, "sample": [117.65863800048828, 425.1253662109375, 164.740478515625, 144.8321990966797, 192.98641967773438, 145.0490264892578, 59.08708190917969, -18.20257568359375, 214.31817626953125, 134.83377075195312, 87.44973754882812, 101.68243408203125, 155.72657775878906, 13.423160552978516, 136.09951782226562, 166.67599487304688, 214.1640625, 290.6279296875, 86.81997680664062, 28.804035186767578, 557.7926025390625, 131.8410186767578, -16.208097457885742, 349.97705078125, 65.53155517578125, -5.7851104736328125, 216.7460479736328, -21.167598724365234, -208.2283172607422, -32.26750946044922, 293.76318359375, 150.09561157226562, 639.23876953125, -178.52525329589844, 183.11807250976562, -3.7873306274414062, 3.2672042846679688, 87.58683776855469, 292.0934753417969, 86.83230590820312, -15.96976089477539, 226.65814208984375, 169.79624938964844, -88.66262817382812, 85.12938690185547, -432.4191589355469, 290.7850341796875, 289.6677551269531, 39.06562042236328, 221.04234313964844, 345.27362060546875, 22.555130004882812, 592.8584594726562, -71.87347412109375, -0.08988571166992188, 515.8294677734375, 79.82347106933594, 96.78800201416016, 23.507888793945312, -136.61932373046875, 50.330196380615234, 53.60023498535156, 62.61553192138672, 8.602622985839844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000245.npy"}
|
|
{"epoch": 0.35976505139500736, "step": 246, "batch_size": 64, "mean": 126.73939514160156, "std": 187.04763793945312, "min": -256.18084716796875, "p10": -82.37894210815429, "median": 78.96263885498047, "p90": 369.1800720214844, "max": 721.2029418945312, "pos_frac": 0.796875, "sample": [221.66732788085938, 361.795166015625, 147.66574096679688, 87.27603912353516, 389.88751220703125, 218.4527130126953, 721.2029418945312, -182.29617309570312, 28.872615814208984, -256.18084716796875, 50.36675262451172, 74.63347625732422, 83.1014404296875, 0.8324604034423828, 166.3389434814453, 373.57000732421875, 293.5887451171875, 64.9463882446289, 45.99688720703125, 48.38983154296875, 49.7889404296875, 372.34503173828125, 143.5487823486328, -87.3871078491211, 109.06956481933594, -116.6612548828125, 229.04661560058594, -63.21807861328125, 342.5193786621094, -110.53870391845703, 4.8347320556640625, -24.543113708496094, 103.44734191894531, 114.56434631347656, 318.0731201171875, 32.61687469482422, 25.783987045288086, -18.809398651123047, -3.9720458984375, 231.73382568359375, 395.7715148925781, 121.8711929321289, 44.56598663330078, 226.5117645263672, 74.1995849609375, 185.6536865234375, 74.82383728027344, -70.69322204589844, 3.6531639099121094, 227.0698699951172, -98.53762817382812, 91.74685668945312, 319.1868591308594, 202.28163146972656, 25.912139892578125, 40.939422607421875, 8.510679244995117, 573.4638061523438, 315.16656494140625, 632.7523803710938, -45.37458038330078, 261.9660949707031, -96.10771179199219, 3.6369667053222656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000246.npy"}
|
|
{"epoch": 0.36123348017621143, "step": 247, "batch_size": 64, "mean": 177.74020385742188, "std": 202.03518676757812, "min": -304.3945617675781, "p10": -31.061357879638642, "median": 156.26173400878906, "p90": 450.514569091797, "max": 681.200439453125, "pos_frac": 0.875, "sample": [-231.46311950683594, 81.02452087402344, -117.10267639160156, 372.6690673828125, 103.67027282714844, 468.34539794921875, 146.11203002929688, 424.49737548828125, -67.4732437133789, 348.4113464355469, 387.03326416015625, 495.5346374511719, 203.86270141601562, 294.80316162109375, 63.97564697265625, 314.32305908203125, 492.7130126953125, 49.260902404785156, 342.1850891113281, 300.1370849609375, 166.41143798828125, 40.021575927734375, -0.0989990234375, 108.38433074951172, 23.846343994140625, 255.85423278808594, 29.497535705566406, 79.46647644042969, 84.6397476196289, 106.12495422363281, 241.24777221679688, 352.37713623046875, 50.95972442626953, 170.16722106933594, 57.11335754394531, 413.2218017578125, 553.700439453125, 355.733154296875, 111.62126159667969, -153.41085815429688, 3.115337371826172, 210.6221466064453, 4.31585693359375, 183.27134704589844, 44.293601989746094, 9.4229736328125, 170.2990264892578, 416.2763671875, 637.9405517578125, 92.16388702392578, 34.10785675048828, -147.599365234375, 79.69117736816406, 681.200439453125, 232.72198486328125, -44.33094024658203, 177.59536743164062, 300.0087890625, 199.3346405029297, -304.3945617675781, 461.664794921875, 27.866073608398438, 141.4564971923828, 244.93011474609375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000247.npy"}
|
|
{"epoch": 0.36270190895741555, "step": 248, "batch_size": 64, "mean": 152.0039825439453, "std": 141.36956787109375, "min": -198.24229431152344, "p10": -5.183817672729489, "median": 146.79666137695312, "p90": 351.2314056396485, "max": 433.3505859375, "pos_frac": 0.875, "sample": [336.7567138671875, 389.0773010253906, 105.35183715820312, 183.58108520507812, 182.19869995117188, 0.6936531066894531, 163.21705627441406, 339.7463684082031, 138.6881561279297, -63.91998291015625, 3.401641845703125, 262.6323547363281, 214.3822021484375, 33.0714111328125, 224.1013946533203, 56.29322814941406, 18.829805374145508, 69.70301818847656, 155.16233825683594, -6.218341827392578, 154.74954223632812, 273.8818664550781, 251.884521484375, 424.6744689941406, 9.2840576171875, -27.584945678710938, 114.44062042236328, 365.86907958984375, 125.77162170410156, 194.34719848632812, 165.45057678222656, -198.24229431152344, 138.84378051757812, 294.83966064453125, 155.476806640625, 175.23025512695312, 69.11042785644531, 24.010635375976562, -31.510498046875, 47.44010543823242, 203.63201904296875, 232.72178649902344, 181.56179809570312, 411.77813720703125, 99.94798278808594, 95.57707214355469, 433.3505859375, 271.7124938964844, 26.651153564453125, 426.927734375, 2.0472335815429688, 128.92897033691406, 327.72607421875, 283.550048828125, 232.15988159179688, 356.153564453125, -2.769927978515625, 44.57756042480469, 110.16368103027344, 291.39483642578125, 127.04029083251953, 5.064289093017578, -6.558742523193359, -89.80105590820312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000248.npy"}
|
|
{"epoch": 0.3641703377386197, "step": 249, "batch_size": 64, "mean": 79.55818939208984, "std": 177.6361541748047, "min": -316.85430908203125, "p10": -142.95180511474607, "median": 67.83724594116211, "p90": 284.6143310546875, "max": 600.3974609375, "pos_frac": 0.65625, "sample": [-34.92710876464844, 287.74810791015625, -50.01747131347656, -100.11552429199219, 112.2773208618164, -120.39739227294922, -5.2201995849609375, 139.4564971923828, 59.317535400390625, 350.8115539550781, 32.989990234375, 49.4979248046875, 78.33290100097656, 66.3015365600586, 220.58677673339844, -316.85430908203125, -66.36189270019531, 358.7130126953125, 169.90533447265625, -86.10600280761719, 156.05276489257812, 205.4206085205078, -180.06243896484375, 137.7081298828125, -36.68470764160156, 343.0525207519531, -159.71531677246094, 277.30218505859375, 129.89505004882812, 55.77775955200195, -25.54891586303711, -11.73208236694336, 207.5032958984375, 50.27345275878906, 126.78824615478516, -24.430465698242188, 180.90948486328125, -315.59979248046875, 109.786376953125, 261.8013610839844, 151.16635131835938, -22.201065063476562, 600.3974609375, -99.83309936523438, -225.19757080078125, -152.3633575439453, 21.359045028686523, 10.26211166381836, 91.9361572265625, 211.21893310546875, 107.22147369384766, 205.583740234375, -212.410400390625, 226.24612426757812, 270.52288818359375, 265.3255615234375, 67.85554504394531, -120.99151611328125, -0.05513763427734375, 192.1979522705078, 67.8189468383789, 409.1368713378906, 379.1109619140625, 12.979881286621094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000249.npy"}
|
|
{"epoch": 0.3656387665198238, "step": 250, "batch_size": 64, "mean": 150.18911743164062, "std": 155.456298828125, "min": -180.5838623046875, "p10": -35.56586246490478, "median": 141.55087280273438, "p90": 374.32860717773457, "max": 518.1981201171875, "pos_frac": 0.796875, "sample": [267.2301330566406, 37.025909423828125, 38.43315124511719, -16.359264373779297, -43.180694580078125, 194.37574768066406, 97.4283218383789, 431.375732421875, 152.85484313964844, -114.18275451660156, 45.237327575683594, 518.1981201171875, 256.90777587890625, 125.70222473144531, 241.62261962890625, 163.50379943847656, 29.60601043701172, 125.01760864257812, 322.7181396484375, 327.29962158203125, 140.20965576171875, -83.67656707763672, -37.90277862548828, 77.3724365234375, 84.7055892944336, 192.364990234375, 71.01778411865234, -30.11305809020996, 61.354217529296875, -5.6572265625, -18.238046646118164, 37.42478942871094, 174.7821502685547, 288.41070556640625, -54.46490478515625, 483.6640625, 142.89208984375, 158.10714721679688, -94.6751480102539, -5.175712585449219, 211.59776306152344, 394.48388671875, 255.37754821777344, 124.910400390625, 150.6446533203125, 57.528297424316406, 250.71585083007812, 123.3349380493164, 284.9794921875, 125.97013854980469, 69.74281311035156, -0.5352783203125, 198.2034912109375, 217.37686157226562, -180.5838623046875, 203.3070068359375, 396.6014404296875, 210.56918334960938, 170.91653442382812, 414.9328308105469, 319.55419921875, 40.38262176513672, 490.5218811035156, 298.3533935546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000250.npy"}
|
|
{"epoch": 0.3671071953010279, "step": 251, "batch_size": 64, "mean": 101.44243621826172, "std": 181.41836547851562, "min": -373.406982421875, "p10": -59.88611373901366, "median": 79.30688858032227, "p90": 327.623959350586, "max": 671.151123046875, "pos_frac": 0.703125, "sample": [382.8231201171875, 12.321380615234375, 317.3006896972656, -19.87828826904297, 209.35211181640625, -266.9097900390625, -373.406982421875, 671.151123046875, -28.091758728027344, 174.0836944580078, 186.42425537109375, -152.28494262695312, 349.41046142578125, 58.096214294433594, 105.263427734375, -38.225982666015625, -250.94415283203125, 474.8141174316406, -7.106664657592773, 172.1980743408203, 16.79379653930664, 248.97763061523438, -14.203086853027344, -44.896873474121094, 5.08795166015625, 136.19712829589844, -125.16868591308594, -66.31007385253906, 84.39877319335938, 223.93344116210938, 65.14117431640625, -118.45879364013672, 287.75274658203125, -40.30706024169922, 51.545005798339844, 198.31716918945312, 69.09436798095703, 116.35274505615234, 154.95126342773438, 141.75177001953125, 10.24481201171875, 219.1123809814453, -23.43064308166504, 290.2877197265625, -4.479156494140625, 348.7198181152344, 205.5858917236328, 181.85923767089844, 74.21500396728516, 137.60089111328125, -39.003944396972656, 21.247329711914062, 563.4791870117188, 70.94255065917969, 332.0482177734375, 157.40603637695312, -31.86141014099121, 190.0782470703125, 103.06787109375, -16.075946807861328, 29.979576110839844, 133.45913696289062, 38.05518341064453, 132.43739318847656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000251.npy"}
|
|
{"epoch": 0.368575624082232, "step": 252, "batch_size": 64, "mean": 142.23912048339844, "std": 146.38037109375, "min": -459.35223388671875, "p10": -13.54698467254638, "median": 148.0460968017578, "p90": 320.26939392089844, "max": 491.2230224609375, "pos_frac": 0.859375, "sample": [294.687255859375, 344.4160461425781, 46.58473587036133, 215.81068420410156, 248.1120147705078, 66.2020263671875, 148.517578125, 320.6166076660156, 71.27975463867188, 128.95590209960938, 219.13856506347656, 9.522125244140625, 170.12600708007812, 55.99275207519531, 111.07954406738281, 157.5541229248047, 56.32846450805664, 138.4088592529297, 165.85684204101562, -46.767845153808594, 247.6035614013672, 175.9010772705078, 46.82114791870117, 408.33966064453125, 252.47634887695312, -131.6114959716797, -20.559682846069336, 107.08026123046875, -16.9407958984375, 147.57461547851562, -25.47031021118164, 186.04400634765625, 30.873430252075195, 149.94204711914062, 64.17837524414062, 114.01531982421875, 134.36862182617188, 419.0415954589844, 357.43096923828125, 306.7021484375, 44.52208709716797, 168.220703125, 161.4969940185547, 40.701019287109375, 221.1298828125, 319.459228515625, 277.87872314453125, 258.2944641113281, 194.5418701171875, 141.4529266357422, -0.0436859130859375, -459.35223388671875, 333.2509765625, 249.2002410888672, 123.770751953125, 105.97010803222656, -5.628091812133789, 162.02301025390625, 491.2230224609375, -57.57070541381836, 63.26981735229492, 32.65099334716797, 171.64193725585938, 188.96661376953125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000252.npy"}
|
|
{"epoch": 0.3700440528634361, "step": 253, "batch_size": 64, "mean": 133.81402587890625, "std": 169.00259399414062, "min": -391.6184997558594, "p10": -64.36259231567382, "median": 117.86876678466797, "p90": 366.36648254394544, "max": 556.162841796875, "pos_frac": 0.8125, "sample": [-62.007713317871094, 25.22557830810547, 272.95806884765625, 49.75419616699219, 17.556884765625, 130.63433837890625, 218.481689453125, 51.46080780029297, 178.2941436767578, 88.17509460449219, 100.61013793945312, 556.162841796875, 246.48849487304688, 65.26348114013672, 179.75506591796875, 106.49447631835938, 397.60858154296875, 91.50543212890625, -73.17341613769531, 102.46192932128906, -25.58094596862793, -391.6184997558594, 233.90093994140625, 380.64044189453125, 333.0605773925781, 9.773126602172852, 428.0255126953125, 13.936500549316406, -19.791717529296875, 35.10206985473633, 18.238059997558594, -101.93190002441406, 114.0738525390625, 254.91030883789062, 289.64306640625, 219.8475341796875, 260.795166015625, -124.0226821899414, 253.69810485839844, 112.51029205322266, 242.1747283935547, 327.58148193359375, 151.56829833984375, 271.1899719238281, 390.4998779296875, 96.87586975097656, 121.66368103027344, 395.0836486816406, 124.10668182373047, -219.98358154296875, 312.1522216796875, 173.9764404296875, -88.23794555664062, 59.214393615722656, -65.371826171875, 89.21070098876953, 153.08778381347656, 215.41673278808594, 204.15501403808594, -35.492523193359375, -45.410221099853516, 54.581268310546875, 435.2214050292969, 161.9139404296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000253.npy"}
|
|
{"epoch": 0.37151248164464024, "step": 254, "batch_size": 64, "mean": 117.6561279296875, "std": 198.08157348632812, "min": -260.4185485839844, "p10": -109.58972625732422, "median": 91.55871963500977, "p90": 391.4845123291017, "max": 805.4329833984375, "pos_frac": 0.734375, "sample": [120.970703125, -22.570114135742188, 141.57440185546875, 91.78518676757812, 99.37693786621094, 113.08673095703125, 19.786376953125, -88.93453216552734, 135.43966674804688, 32.43647766113281, 132.884033203125, 142.74461364746094, 255.80447387695312, 34.634185791015625, -103.66981506347656, -260.4185485839844, 178.37310791015625, -40.74470138549805, 89.06695556640625, 203.421630859375, 3.8107032775878906, 236.3358612060547, 510.2555236816406, 326.504150390625, 366.9327087402344, -172.53732299804688, 402.0067138671875, 308.7757263183594, -10.660408020019531, 14.034189224243164, 91.3322525024414, 197.250244140625, 66.3857192993164, -192.72647094726562, 21.22787094116211, 181.50918579101562, -72.2375717163086, 421.0008544921875, -112.1268310546875, 60.62826919555664, -29.75469207763672, -203.5771484375, 59.483360290527344, 468.17572021484375, -0.32208824157714844, 210.33584594726562, 5.36534309387207, 335.7857666015625, 61.792518615722656, 176.61062622070312, 805.4329833984375, 147.8241729736328, 84.42178344726562, -18.430641174316406, -161.98812866210938, 554.3615112304688, 136.502685546875, 233.76504516601562, 158.81851196289062, 55.21067810058594, -116.45858764648438, 514.278564453125, 151.3648681640625, -21.755523681640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000254.npy"}
|
|
{"epoch": 0.37298091042584436, "step": 255, "batch_size": 64, "mean": 125.03014373779297, "std": 176.63027954101562, "min": -420.80224609375, "p10": -51.005148315429686, "median": 99.80470275878906, "p90": 331.5616577148438, "max": 581.118896484375, "pos_frac": 0.765625, "sample": [581.118896484375, -92.90435791015625, 19.8765869140625, 336.56787109375, 157.63455200195312, -42.980316162109375, -46.95691680908203, -36.69123840332031, -420.80224609375, 57.903377532958984, 280.48004150390625, 84.88603210449219, -9.14501953125, 22.014995574951172, 217.53048706054688, 195.9097900390625, -147.4005126953125, -210.53228759765625, 170.6682891845703, 276.4939880371094, 248.69296264648438, 35.74867248535156, 95.58252716064453, 20.14975357055664, 94.89163208007812, -52.74010467529297, -95.76195526123047, 372.8148193359375, -5.218669891357422, 173.89389038085938, 200.60606384277344, 224.3284912109375, 62.30678176879883, 319.8804931640625, 361.5740051269531, 319.6819152832031, 256.1285095214844, 346.24456787109375, 315.35040283203125, 541.6181640625, 52.92328643798828, -4.213521957397461, 272.2919921875, 136.9984130859375, 25.801708221435547, 170.8505401611328, 45.07810974121094, 225.89743041992188, -111.04478454589844, 319.00128173828125, 406.077392578125, 50.40177917480469, 124.0669937133789, 47.00969696044922, -39.72423553466797, 125.36514282226562, 25.937782287597656, 104.0268783569336, 28.574485778808594, 144.16685485839844, -42.171653747558594, 72.45889282226562, 318.7265625, 273.9832763671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000255.npy"}
|
|
{"epoch": 0.3744493392070485, "step": 256, "batch_size": 64, "mean": 132.7100067138672, "std": 170.6522216796875, "min": -289.64593505859375, "p10": -66.04427719116211, "median": 113.23696899414062, "p90": 343.82243957519535, "max": 578.8585205078125, "pos_frac": 0.765625, "sample": [97.6912841796875, 514.4669799804688, 83.03165435791016, 242.06150817871094, 193.01815795898438, 245.73416137695312, 40.37928771972656, 174.32489013671875, -26.213287353515625, 22.066497802734375, 301.4258728027344, 225.637939453125, 207.72994995117188, 345.6507568359375, 258.72064208984375, -131.05453491210938, 58.408931732177734, 174.3168487548828, -289.64593505859375, 31.14572525024414, 278.58587646484375, 116.80796813964844, -122.00347900390625, 271.13604736328125, 63.1202278137207, 261.6910705566406, -62.21062469482422, -71.06803894042969, -67.68727111816406, 181.78761291503906, 360.3160400390625, -2.841522216796875, 210.44552612304688, 94.21890258789062, 55.26411437988281, 122.08992767333984, 339.5563659667969, -68.39111328125, 47.45384216308594, 42.45002746582031, -195.67088317871094, 36.314361572265625, 20.739906311035156, 3.27099609375, 178.39300537109375, -28.213077545166016, 191.4898681640625, 109.66596984863281, -25.4854736328125, 260.98406982421875, -51.4112548828125, 101.43980407714844, -10.320014953613281, 193.00201416015625, 349.31158447265625, 383.24212646484375, 141.38381958007812, 66.95635986328125, 578.8585205078125, 519.415771484375, 221.3887939453125, 300.1774597167969, -5.495718002319336, 334.3837585449219], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000256.npy"}
|
|
{"epoch": 0.37591776798825255, "step": 257, "batch_size": 64, "mean": 141.07919311523438, "std": 180.25241088867188, "min": -256.56219482421875, "p10": -46.79545249938964, "median": 106.59395980834961, "p90": 415.37260437011724, "max": 642.541748046875, "pos_frac": 0.78125, "sample": [446.89239501953125, -39.297542572021484, 8.720413208007812, 110.25357055664062, 363.8237609863281, -14.493545532226562, 203.31817626953125, 422.7191162109375, 237.03558349609375, 146.36300659179688, -2.6893138885498047, -50.00884246826172, 96.65084838867188, 294.87225341796875, 154.48223876953125, 23.42974853515625, 218.53964233398438, 201.4907684326172, 27.00368881225586, 427.3916320800781, 34.89430236816406, 398.2307434082031, 261.4036865234375, -146.12576293945312, 84.92247009277344, 116.78524780273438, -71.06675720214844, 13.274932861328125, -29.70760726928711, 451.0910339355469, 190.26708984375, 226.3928985595703, 254.23233032226562, 642.541748046875, 80.88272094726562, -54.6051025390625, 102.9343490600586, 9.853694915771484, 111.80892181396484, -256.56219482421875, 85.87659454345703, 395.0321350097656, 38.634952545166016, 239.488525390625, 185.58348083496094, 212.30145263671875, 73.22693634033203, 3.5531692504882812, 507.38555908203125, 243.39385986328125, 226.838623046875, 283.17340087890625, -30.11035919189453, 97.03966522216797, 49.922821044921875, 547.2301025390625, -92.85154724121094, -11.911605834960938, 163.44415283203125, -146.4692840576172, 40.34046936035156, 29.283599853515625, -18.17364501953125, 208.89016723632812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000257.npy"}
|
|
{"epoch": 0.37738619676945667, "step": 258, "batch_size": 64, "mean": 135.947265625, "std": 191.71681213378906, "min": -493.3393249511719, "p10": -53.54913406372069, "median": 128.76442337036133, "p90": 355.68637390136723, "max": 648.8013305664062, "pos_frac": 0.765625, "sample": [280.5433044433594, 30.614744186401367, -95.652587890625, 249.228271484375, 46.43603515625, 134.96978759765625, 255.28314208984375, 230.15176391601562, 218.72659301757812, -6.23065185546875, -34.49358367919922, -24.93250274658203, -57.723854064941406, 35.9300537109375, 90.53738403320312, -29.587661743164062, -24.94147491455078, 101.56918334960938, 152.97900390625, -10.571968078613281, 47.53326416015625, 255.25375366210938, 512.34228515625, 204.0016326904297, 486.6824645996094, 244.74163818359375, 200.09820556640625, 27.782516479492188, 155.64340209960938, 338.1463623046875, 187.86317443847656, 278.5513916015625, 312.01849365234375, 167.2371368408203, -205.89773559570312, 95.2667236328125, 265.3620910644531, -341.8861083984375, 24.477394104003906, 113.6497573852539, 386.92596435546875, -10.42586898803711, -86.37205505371094, 363.2035217285156, 30.216617584228516, -493.3393249511719, 648.8013305664062, 110.37531280517578, -127.6708984375, 115.56627655029297, 232.41143798828125, -43.80812072753906, 289.811279296875, 334.26214599609375, 378.2645568847656, 36.253318786621094, 281.48175048828125, 402.20037841796875, 152.5262451171875, 255.50921630859375, 91.30403137207031, 122.5590591430664, 242.1392822265625, 76.7265396118164], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000258.npy"}
|
|
{"epoch": 0.3788546255506608, "step": 259, "batch_size": 64, "mean": 115.67851257324219, "std": 178.36978149414062, "min": -246.0644073486328, "p10": -96.26671905517577, "median": 86.70748901367188, "p90": 359.23869323730474, "max": 530.7528076171875, "pos_frac": 0.75, "sample": [210.74766540527344, -16.785619735717773, 154.67120361328125, 197.9558868408203, 17.244823455810547, -98.89163208007812, 313.01947021484375, 392.1914367675781, 420.896728515625, 180.5048065185547, 49.48461151123047, -39.47637176513672, 104.16810607910156, -66.75162506103516, 7.038360595703125, -101.65045166015625, 343.0863342285156, -52.94073486328125, 451.210693359375, 154.31552124023438, 108.09956359863281, 114.38134765625, 83.82904052734375, 54.41611099243164, 291.2735595703125, 18.645706176757812, 53.27772521972656, 47.32569885253906, 530.7528076171875, -190.98915100097656, 61.329071044921875, 29.709985733032227, 486.003173828125, 158.0654754638672, 13.487174987792969, -166.3542938232422, 123.65959930419922, 332.8436279296875, -137.77850341796875, 317.3255615234375, -159.22560119628906, 19.726463317871094, 330.98736572265625, -90.14192199707031, 89.5859375, 48.84779357910156, 366.1611328125, 304.4259338378906, -246.0644073486328, 152.1597442626953, -27.612546920776367, 297.34466552734375, 270.0945739746094, -21.432708740234375, 387.51092529296875, -80.2789535522461, 125.86264038085938, 320.83984375, 132.4329833984375, -67.45304107666016, 60.259334564208984, 35.68314743041992, 139.80776977539062, 64.5613784790039], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000259.npy"}
|
|
{"epoch": 0.3803230543318649, "step": 260, "batch_size": 64, "mean": 157.31895446777344, "std": 175.4031982421875, "min": -175.98851013183594, "p10": -17.702655792236325, "median": 126.90476989746094, "p90": 390.98991699218755, "max": 913.0970458984375, "pos_frac": 0.875, "sample": [214.73025512695312, -77.46114349365234, 43.88721466064453, -106.0298080444336, 19.48659324645996, 161.76242065429688, 102.5741195678711, 150.8784637451172, 395.04888916015625, 47.235015869140625, 56.279850006103516, 381.51898193359375, 119.35658264160156, 96.21571350097656, 140.9129180908203, 913.0970458984375, 147.71234130859375, 235.89382934570312, 65.0498046875, 461.449951171875, -175.98851013183594, 159.38021850585938, -25.145292282104492, 161.63604736328125, -109.49713897705078, 4.227897644042969, 146.45787048339844, -18.700088500976562, 234.87844848632812, 112.92694091796875, 107.57040405273438, 348.3974609375, -15.375312805175781, 241.55799865722656, 134.4529571533203, 327.649169921875, 342.3625183105469, 160.8382568359375, 115.41484069824219, 97.30438232421875, 94.79698181152344, 58.52830505371094, 340.04791259765625, 141.0206298828125, 146.018798828125, 49.60996627807617, 29.912673950195312, 92.48834228515625, 180.65106201171875, 190.01670837402344, 208.535400390625, 160.2567138671875, 117.98584747314453, 403.176513671875, 39.401100158691406, -47.79425811767578, 81.26327514648438, 437.25665283203125, 30.64227294921875, 571.7263793945312, 88.98302459716797, 409.5088806152344, 43.4849853515625, 280.9462890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000260.npy"}
|
|
{"epoch": 0.38179148311306904, "step": 261, "batch_size": 64, "mean": 147.43002319335938, "std": 203.79600524902344, "min": -327.4521789550781, "p10": -69.56644935607909, "median": 115.84900283813477, "p90": 387.3689727783203, "max": 918.1975708007812, "pos_frac": 0.765625, "sample": [380.9388122558594, 348.3477478027344, 54.771583557128906, -230.12779235839844, 115.78053283691406, 918.1975708007812, 36.79875183105469, 355.72393798828125, -23.15090560913086, 207.72512817382812, -1.5984973907470703, 85.07808685302734, -94.26531982421875, 420.28106689453125, 99.00660705566406, 212.29177856445312, 222.92526245117188, 324.23455810546875, 357.2962341308594, -21.30791473388672, 81.55503845214844, 77.855224609375, 115.91747283935547, 190.8851318359375, 181.6472625732422, 321.44873046875, 504.22698974609375, 20.719818115234375, 288.0435791015625, 217.3784637451172, 57.5784912109375, 27.884620666503906, -0.01654815673828125, 72.20906066894531, 235.18878173828125, 58.27130889892578, 259.44427490234375, 357.340087890625, 272.7066650390625, 147.04220581054688, 227.89920043945312, -63.95321273803711, 474.5182800292969, 416.7254638671875, -5.912729263305664, 390.124755859375, -179.5943603515625, 187.78073120117188, 7.030342102050781, 220.57037353515625, 105.19953155517578, 464.690673828125, -43.56024932861328, -327.4521789550781, -182.8145294189453, 107.47266387939453, -152.46356201171875, -17.432458877563477, 180.34097290039062, 186.49830627441406, 57.18219757080078, 68.22069549560547, 130.149169921875, -71.97212219238281], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000261.npy"}
|
|
{"epoch": 0.3832599118942731, "step": 262, "batch_size": 64, "mean": 120.58787536621094, "std": 160.24339294433594, "min": -237.8165740966797, "p10": -71.37945175170896, "median": 122.001220703125, "p90": 291.6870025634767, "max": 713.6873779296875, "pos_frac": 0.796875, "sample": [46.21343994140625, 398.1966247558594, 110.48160552978516, 200.99261474609375, -237.8165740966797, 329.0727233886719, 467.66693115234375, 101.79609680175781, 147.9425048828125, 255.06365966796875, -133.6666259765625, -231.23806762695312, 129.76145935058594, 713.6873779296875, 146.28884887695312, -162.45703125, 149.3999786376953, 242.38665771484375, -6.563055038452148, 181.65914916992188, 151.52455139160156, 78.12621307373047, -83.5943603515625, 307.3827209472656, 138.4698944091797, -11.581680297851562, 31.130451202392578, 362.4285888671875, 118.77255249023438, 243.91455078125, 48.484954833984375, 85.00926971435547, 101.33935546875, -42.79049301147461, 86.81227111816406, 59.70557403564453, 180.32037353515625, -15.978179931640625, 162.86105346679688, 167.69122314453125, 125.22988891601562, 130.04551696777344, 1.1747055053710938, 239.10467529296875, 228.46104431152344, 77.55284881591797, 238.8162841796875, 102.99510955810547, -102.80789947509766, -42.87799835205078, 65.12303161621094, 40.22004699707031, 49.46482849121094, 311.3273010253906, -136.54852294921875, -42.089927673339844, 115.7078857421875, 175.9344482421875, 243.10081481933594, 228.33160400390625, 170.21214294433594, 22.553024291992188, 251.3123321533203, 206.38357543945312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000262.npy"}
|
|
{"epoch": 0.38472834067547723, "step": 263, "batch_size": 64, "mean": 172.56646728515625, "std": 208.0592498779297, "min": -331.70025634765625, "p10": -70.11700592041015, "median": 136.90958404541016, "p90": 440.61325683593765, "max": 719.70556640625, "pos_frac": 0.8125, "sample": [318.9903869628906, -331.70025634765625, 135.43202209472656, 124.05223083496094, 369.37799072265625, 49.53343963623047, 316.38067626953125, 58.11981201171875, 50.05430603027344, 110.37789154052734, -269.96624755859375, 152.91937255859375, 78.6976547241211, 298.755859375, 293.1785583496094, 175.00416564941406, 19.350141525268555, 389.94952392578125, 100.24588775634766, 159.70713806152344, -23.994577407836914, -91.82180786132812, 34.233314514160156, 300.6237487792969, -29.61636734008789, -74.11691284179688, 330.60919189453125, 138.38714599609375, -30.485252380371094, 83.70614624023438, 39.26420593261719, 19.467771530151367, 348.9866638183594, 60.22386169433594, -60.78388977050781, 495.24383544921875, 272.28863525390625, 101.84260559082031, 373.4999694824219, 522.766845703125, 117.11759185791016, 454.04876708984375, 107.37921142578125, 253.18087768554688, 313.3009338378906, 151.17575073242188, 474.6904296875, -83.55055236816406, 224.71600341796875, 258.6698303222656, 391.36322021484375, 409.26373291015625, 671.61962890625, -153.6625213623047, 2.4603118896484375, 546.716796875, -44.728248596191406, -82.9701919555664, 202.8461456298828, 211.5775146484375, 126.03245544433594, 719.70556640625, 132.30194091796875, 232.21292114257812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000263.npy"}
|
|
{"epoch": 0.38619676945668135, "step": 264, "batch_size": 64, "mean": 145.2810821533203, "std": 207.505126953125, "min": -256.8216247558594, "p10": -77.58253021240233, "median": 120.32405853271484, "p90": 440.3517608642578, "max": 618.5989379882812, "pos_frac": 0.78125, "sample": [212.47320556640625, 296.7690734863281, 48.073795318603516, 614.5197143554688, 80.78538513183594, -252.0602264404297, -166.4322509765625, 407.0829772949219, 71.39105224609375, 470.2510986328125, -69.8214340209961, 35.29723358154297, 120.31343078613281, -153.3834228515625, 152.55731201171875, 161.4384765625, 278.4801330566406, 437.55511474609375, 373.3212585449219, 295.5103759765625, 38.81207275390625, 226.87844848632812, 260.84515380859375, 184.89781188964844, 50.45616149902344, 19.627975463867188, -54.57611846923828, -13.601791381835938, -80.9087142944336, 202.6470489501953, -61.644622802734375, 19.869117736816406, 46.746070861816406, 618.5989379882812, 3.2264404296875, 441.5503234863281, 169.40365600585938, 221.41799926757812, 29.818771362304688, 243.5989227294922, 54.550933837890625, 234.8946990966797, 120.33468627929688, 109.38861083984375, -12.402153015136719, 602.9356079101562, 387.4884033203125, -238.42816162109375, 189.169189453125, -172.45242309570312, 201.3249053955078, 13.86136245727539, 232.89569091796875, 482.70367431640625, -17.324020385742188, 148.98117065429688, 55.90015411376953, 41.008697509765625, -256.8216247558594, 530.73681640625, 310.8588562011719, 245.1224822998047, -4.945743560791016, 56.42132568359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000264.npy"}
|
|
{"epoch": 0.3876651982378855, "step": 265, "batch_size": 64, "mean": 142.77999877929688, "std": 233.81333923339844, "min": -433.81591796875, "p10": -105.16548461914061, "median": 111.35327529907227, "p90": 409.7297973632813, "max": 831.9027099609375, "pos_frac": 0.75, "sample": [68.32732391357422, 399.7268981933594, 320.0338439941406, 221.32176208496094, 47.42272186279297, 153.22702026367188, 359.23028564453125, 22.022193908691406, 672.08740234375, 80.783447265625, 253.20370483398438, -271.3466796875, 26.251285552978516, -30.34231948852539, -127.13641357421875, 24.371326446533203, 371.9036560058594, -96.88069152832031, 97.40705108642578, -43.861175537109375, 230.25479125976562, 831.9027099609375, -433.81591796875, 537.9511108398438, -134.82754516601562, 178.98757934570312, 202.93484497070312, 466.2265319824219, -66.40972900390625, 703.0846557617188, -227.3003387451172, 294.7897033691406, 66.48846435546875, -54.62582015991211, 87.15603637695312, -95.93368530273438, 7.4482421875, -71.58717346191406, 379.067626953125, -108.71611022949219, 414.0167541503906, 103.91448974609375, 74.22610473632812, 375.74188232421875, -36.55919647216797, 202.0403289794922, 20.962478637695312, 158.21612548828125, -30.475540161132812, 270.4077453613281, 89.99262237548828, 144.57061767578125, 156.60977172851562, 364.4299621582031, 123.07133483886719, 139.7793731689453, 222.9051055908203, 118.79206085205078, 54.832942962646484, -275.1116638183594, 439.53729248046875, 92.374755859375, 338.56768798828125, 234.2482452392578], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000265.npy"}
|
|
{"epoch": 0.3891336270190896, "step": 266, "batch_size": 64, "mean": 113.11186218261719, "std": 193.6034698486328, "min": -264.2123107910156, "p10": -88.4216468811035, "median": 76.62957382202148, "p90": 400.6787872314453, "max": 673.8787841796875, "pos_frac": 0.71875, "sample": [-38.09442138671875, 524.9966430664062, 51.230369567871094, 123.02564239501953, 230.3359832763672, 118.91868591308594, -54.52943420410156, 274.7876892089844, 22.358795166015625, 426.45123291015625, 124.25614929199219, 277.4827880859375, -74.57896423339844, 256.2646179199219, -76.94804382324219, 48.27519989013672, 251.60159301757812, 78.89199829101562, 183.97804260253906, 74.36714935302734, 141.7378692626953, -263.80499267578125, -93.33890533447266, 382.0263671875, 131.8872528076172, 398.6001281738281, 197.8722686767578, 25.37194061279297, 35.91315460205078, 401.56964111328125, 443.9792175292969, 272.10601806640625, 673.8787841796875, 472.217041015625, 209.1808624267578, 112.966064453125, 44.16898727416992, 260.75164794921875, 31.01679229736328, 48.143943786621094, 71.29373168945312, 151.76177978515625, 58.622413635253906, 33.70343780517578, 156.6363983154297, -2.339824676513672, -264.2123107910156, -225.57867431640625, 271.09130859375, 4.728998184204102, -169.40673828125, -73.0488052368164, -30.364273071289062, -102.5185546875, -36.17827606201172, -46.82672882080078, -19.1904239654541, -36.03020477294922, -189.84600830078125, 423.4201354980469, 181.144775390625, 236.0634765625, 13.735038757324219, 83.18285369873047], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000266.npy"}
|
|
{"epoch": 0.39060205580029367, "step": 267, "batch_size": 64, "mean": 134.9067840576172, "std": 204.47283935546875, "min": -485.0990295410156, "p10": -69.78702926635742, "median": 109.8705940246582, "p90": 343.85541992187507, "max": 718.05322265625, "pos_frac": 0.796875, "sample": [90.5652847290039, 204.92425537109375, 352.1053466796875, -4.463459014892578, 85.33807373046875, -15.22120475769043, 277.81927490234375, 168.1767578125, 18.9481201171875, 50.5809211730957, -93.1339111328125, -86.38967895507812, 11.92486572265625, 324.6055908203125, 149.0611572265625, 120.29634857177734, -72.18636322021484, 212.69882202148438, 486.67572021484375, 718.05322265625, 95.56803894042969, 92.0768814086914, 127.64217376708984, 5.2046966552734375, -141.6077423095703, 94.90657806396484, 77.61004638671875, 77.6037368774414, 318.314453125, 210.9810028076172, 565.817138671875, 406.1628723144531, 665.9906005859375, 40.446380615234375, -13.82950210571289, 251.9872283935547, 4.344554901123047, -87.4034194946289, 4.671733856201172, 139.55551147460938, 287.3945617675781, 19.035781860351562, -6.419893264770508, 560.0572509765625, -485.0990295410156, -31.74919891357422, -361.806640625, 191.28692626953125, 198.87066650390625, 118.84231567382812, 200.05226135253906, 211.08908081054688, 98.53001403808594, 273.4106140136719, 56.47455596923828, 147.58468627929688, -64.18858337402344, 294.0720520019531, 64.73011016845703, 118.68899536132812, 292.7445373535156, 139.40744018554688, 273.5516662597656, 101.05219268798828], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000267.npy"}
|
|
{"epoch": 0.3920704845814978, "step": 268, "batch_size": 64, "mean": 142.48526000976562, "std": 207.6282196044922, "min": -342.66015625, "p10": -44.1714458465576, "median": 126.06000518798828, "p90": 359.5283081054688, "max": 761.3951416015625, "pos_frac": 0.8125, "sample": [18.21765899658203, 479.973876953125, 69.64456939697266, -18.219261169433594, 64.08549499511719, 60.62323760986328, 159.29019165039062, 87.31082153320312, 363.0863037109375, 106.17477416992188, 171.56161499023438, -310.3543701171875, 146.31729125976562, 62.62425231933594, 262.29364013671875, -232.87521362304688, 89.70952606201172, 47.60235595703125, -294.6347961425781, 137.6925811767578, 165.2232666015625, 761.3951416015625, 98.75868225097656, 150.20562744140625, 74.16737365722656, 287.15179443359375, 498.9560546875, 534.2427978515625, -29.155872344970703, 332.5841064453125, -71.79706573486328, 67.64625549316406, -69.41796875, 5.409511566162109, 636.2422485351562, 154.50221252441406, 202.06695556640625, 145.51148986816406, 248.57232666015625, 222.18455505371094, 131.63174438476562, 264.00634765625, 18.622520446777344, -50.21302795410156, 308.889404296875, 12.82379150390625, 180.12303161621094, 316.009765625, 4.9869384765625, 257.0393371582031, 177.86663818359375, 120.48826599121094, 663.7533569335938, 189.79229736328125, 64.48092651367188, 100.72431945800781, 110.76313018798828, -5.079460144042969, 351.226318359375, -15.240583419799805, 232.9541015625, -342.66015625, -30.074420928955078, 171.5684051513672], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000268.npy"}
|
|
{"epoch": 0.3935389133627019, "step": 269, "batch_size": 64, "mean": 173.07493591308594, "std": 224.96292114257812, "min": -403.6544494628906, "p10": -79.86783905029297, "median": 171.82573699951172, "p90": 451.83419799804693, "max": 705.7399291992188, "pos_frac": 0.75, "sample": [135.76829528808594, 328.0625915527344, 150.906494140625, 276.78497314453125, -91.59209442138672, 197.3472900390625, 296.56182861328125, 153.51156616210938, -80.31503295898438, -42.35425567626953, 294.8567810058594, -3.4139404296875, 455.2379455566406, 11.08404541015625, 306.2744140625, 284.49176025390625, -12.938575744628906, 190.13990783691406, -61.09715270996094, -403.6544494628906, 291.9317626953125, 17.554279327392578, 211.8466796875, -227.75048828125, -78.82438659667969, 383.51812744140625, 15.948097229003906, 631.5399169921875, 499.08880615234375, 211.79437255859375, 152.035888671875, 57.03070068359375, 521.4175415039062, 301.6248779296875, -16.721878051757812, 325.0858154296875, 441.9279479980469, 99.86552429199219, 345.00714111328125, 2.009521484375, 1.3404617309570312, 145.3451385498047, -194.75682067871094, 28.633453369140625, 123.02510070800781, 589.0582275390625, 302.076904296875, 513.9859008789062, 272.2279357910156, -24.955825805664062, 320.3723449707031, 263.464599609375, -8.49310302734375, 705.7399291992188, -126.65951538085938, 443.8921203613281, 3.78778076171875, 44.22191619873047, -39.8879508972168, 300.42266845703125, 426.9295654296875, 404.3490905761719, -179.2135467529297, 190.29705810546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000269.npy"}
|
|
{"epoch": 0.39500734214390604, "step": 270, "batch_size": 64, "mean": 125.37371826171875, "std": 195.267578125, "min": -424.5915222167969, "p10": -75.3889404296875, "median": 116.11680221557617, "p90": 351.4475830078125, "max": 710.8721923828125, "pos_frac": 0.8125, "sample": [213.49050903320312, 324.4560546875, 433.050048828125, -270.1706237792969, -424.5915222167969, -2.4582977294921875, 33.747093200683594, 251.37808227539062, 109.73345184326172, 301.5246887207031, 346.467041015625, -46.357757568359375, 128.18167114257812, 185.21763610839844, 2.598724365234375, 62.35668182373047, 92.81551361083984, 131.7134552001953, -148.51736450195312, 220.39639282226562, 287.56103515625, 152.34478759765625, 7.183174133300781, 226.25131225585938, 435.13671875, 279.1471252441406, 16.0687255859375, 103.25562286376953, 208.38418579101562, 116.54984283447266, -284.4937744140625, 366.3900146484375, 710.8721923828125, -78.69618225097656, -93.5965576171875, 250.34417724609375, 349.0474853515625, 31.66167640686035, 110.95048522949219, 14.49648666381836, 552.08203125, -214.019287109375, 155.14202880859375, 21.512447357177734, 150.26736450195312, 115.68376159667969, 29.921005249023438, 232.8250732421875, -67.67204284667969, -18.245342254638672, 210.64404296875, 287.8501281738281, 54.15723419189453, 18.979536056518555, 23.018753051757812, 118.53512573242188, 6.858795166015625, 56.10227966308594, -20.51593780517578, 352.4761962890625, 160.82582092285156, 140.12368774414062, 62.31107711791992, 441.16412353515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000270.npy"}
|
|
{"epoch": 0.3964757709251101, "step": 271, "batch_size": 64, "mean": 154.854248046875, "std": 176.26657104492188, "min": -130.7291259765625, "p10": -23.587857246398926, "median": 107.76160430908203, "p90": 415.420281982422, "max": 675.0081176757812, "pos_frac": 0.796875, "sample": [675.0081176757812, 357.60205078125, 104.12223052978516, -15.608837127685547, 215.2474822998047, -28.359912872314453, 356.2643127441406, 373.6837463378906, 34.68073272705078, -42.77631378173828, 284.71368408203125, -130.7291259765625, 429.923828125, -107.09333038330078, 86.51018524169922, -23.96851348876953, 120.31210327148438, -10.504112243652344, 107.43443298339844, -3.117645263671875, -82.66830444335938, 14.318267822265625, 455.20318603515625, 337.8095703125, 487.6854248046875, 381.57867431640625, 79.35958099365234, -22.69965934753418, 84.39755249023438, 127.60295867919922, -8.095195770263672, 187.80638122558594, 99.607666015625, 8.186759948730469, -83.55722045898438, 124.05462646484375, 62.34405517578125, 2.8976669311523438, 371.9088439941406, 74.34553527832031, 7.939216613769531, 519.7882690429688, 44.48902130126953, -14.30345344543457, 129.03709411621094, 40.934017181396484, 108.08877563476562, 144.23680114746094, 325.5187683105469, 195.17544555664062, 455.082275390625, 248.18905639648438, 104.40279388427734, 38.72621154785156, 441.29534912109375, 6.403358459472656, 231.80706787109375, 339.5733337402344, 65.37518310546875, 228.5391082763672, 159.1190185546875, 275.25445556640625, 139.83697509765625, 190.7328338623047], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000271.npy"}
|
|
{"epoch": 0.39794419970631423, "step": 272, "batch_size": 64, "mean": 188.56448364257812, "std": 181.40406799316406, "min": -185.11183166503906, "p10": -59.958739471435536, "median": 192.63511657714844, "p90": 416.173486328125, "max": 618.6817626953125, "pos_frac": 0.828125, "sample": [294.76263427734375, 234.24520874023438, -13.40165901184082, -105.617919921875, -26.178146362304688, 325.1820068359375, 226.0143280029297, 417.9644775390625, 422.44244384765625, 304.9771728515625, 321.5752868652344, 383.8498229980469, -69.83806610107422, 157.05914306640625, -164.62213134765625, 66.14385986328125, 89.56023406982422, 192.1554718017578, 179.34030151367188, 154.4393310546875, -65.27957153320312, 284.50701904296875, 310.6584777832031, 207.2618408203125, 324.2593994140625, 121.3205337524414, 0.7661628723144531, 278.8727111816406, 113.37650299072266, 600.9951171875, 39.82518005371094, 186.1311798095703, 193.11476135253906, 154.12594604492188, 42.26127624511719, 376.5932922363281, 442.2773742675781, -185.11183166503906, 198.47845458984375, 228.08868408203125, 618.6817626953125, 17.333831787109375, 503.8885498046875, 143.9298553466797, 305.0562744140625, -78.80884552001953, 290.36328125, 36.9872932434082, 226.68431091308594, 379.0953063964844, -38.92066955566406, 208.36392211914062, 5.69561767578125, -98.77317810058594, -47.54346466064453, 285.28314208984375, 251.68408203125, 69.14067840576172, 400.126708984375, 147.94482421875, 174.119140625, 411.9945068359375, 446.9162292480469, 166.30767822265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000272.npy"}
|
|
{"epoch": 0.39941262848751835, "step": 273, "batch_size": 64, "mean": 185.6915283203125, "std": 198.50830078125, "min": -267.08868408203125, "p10": -71.54261245727538, "median": 170.5712661743164, "p90": 437.0334411621095, "max": 666.9568481445312, "pos_frac": 0.8125, "sample": [243.0594482421875, 303.793212890625, 65.52462768554688, 171.56478881835938, 570.9861450195312, 479.0562744140625, 154.61080932617188, -72.64087677001953, 598.6105346679688, -68.97999572753906, 409.9940185546875, 193.9339599609375, -32.481597900390625, 88.38262176513672, 8.242341995239258, 146.38629150390625, 2.7190208435058594, 199.184326171875, 195.15194702148438, 347.4300537109375, 217.3047637939453, 42.38676452636719, 66.658935546875, 169.57774353027344, 88.28874206542969, 71.23511505126953, 346.2392578125, 154.79708862304688, 167.20986938476562, 185.77096557617188, 314.34588623046875, -18.381431579589844, 370.7090148925781, 350.25384521484375, -22.953035354614258, 603.4659423828125, 538.0652465820312, 240.52879333496094, 176.68711853027344, 282.47344970703125, -163.37347412109375, -115.90032958984375, 276.4932861328125, -75.1389389038086, 257.4095458984375, 318.1490478515625, 357.4544677734375, 52.5849609375, 123.67601013183594, -267.08868408203125, 666.9568481445312, 132.36599731445312, 102.71464538574219, -80.10113525390625, -7.560821533203125, 338.8234558105469, 15.642044067382812, 99.16375732421875, 289.15325927734375, -73.56520080566406, 334.6429443359375, 115.54122161865234, 388.4008483886719, 448.62176513671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000273.npy"}
|
|
{"epoch": 0.4008810572687225, "step": 274, "batch_size": 64, "mean": 137.7598876953125, "std": 183.34228515625, "min": -219.91419982910156, "p10": -91.91508026123046, "median": 121.70694732666016, "p90": 415.1996582031252, "max": 512.55810546875, "pos_frac": 0.75, "sample": [56.073829650878906, 75.38705444335938, 36.75303649902344, -154.2250518798828, 222.56488037109375, 168.4761962890625, 224.73008728027344, -27.20258331298828, 117.23455047607422, -31.784698486328125, -125.10250854492188, 457.2338562011719, 315.91741943359375, 219.40509033203125, 233.03701782226562, 149.4137725830078, 20.687889099121094, 453.3565673828125, -219.91419982910156, 289.6108093261719, -86.30928039550781, 115.38624572753906, -62.534454345703125, 67.19436645507812, 61.55718231201172, 259.197998046875, 494.1496887207031, -94.31756591796875, 119.10091400146484, 512.1041259765625, -44.29933547973633, 119.18313598632812, 339.7173767089844, 147.3948974609375, 20.51873207092285, 148.42970275878906, 433.889892578125, -66.66073608398438, 162.10792541503906, 94.99742126464844, 282.1814880371094, -23.768692016601562, 118.59156799316406, 255.3621063232422, 368.74676513671875, -74.4572982788086, 124.23075866699219, 29.730947494506836, 169.90167236328125, 512.55810546875, 277.2360534667969, 116.39759826660156, 3.2948074340820312, 157.713623046875, -163.65869140625, 313.30322265625, -141.017822265625, -111.39535522460938, 506.2310791015625, 199.24777221679688, 124.969482421875, 371.589111328125, -12.210418701171875, 189.39413452148438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000274.npy"}
|
|
{"epoch": 0.4023494860499266, "step": 275, "batch_size": 64, "mean": 148.71762084960938, "std": 182.68162536621094, "min": -224.30813598632812, "p10": -69.08231048583983, "median": 146.82706451416016, "p90": 394.8623260498047, "max": 550.0008544921875, "pos_frac": 0.796875, "sample": [180.8595733642578, 62.78117370605469, 278.3246154785156, 8.906253814697266, 10.976568222045898, -78.91230010986328, 361.0611877441406, 134.6954803466797, 480.5027770996094, -82.46404266357422, 264.9060974121094, 460.7295227050781, 509.36474609375, 81.54816436767578, 158.53036499023438, 223.3612823486328, -32.612552642822266, 3.49493408203125, 132.8076171875, 189.7936553955078, -134.96673583984375, 227.59458923339844, 400.3073425292969, -16.92858123779297, 51.346229553222656, -191.21646118164062, 284.47027587890625, 165.28369140625, 176.34072875976562, 77.09581756591797, -94.55846405029297, 26.402297973632812, 332.9693603515625, 246.36312866210938, 5.381589889526367, 20.690444946289062, -195.15786743164062, -41.610504150390625, -39.41761016845703, 34.93913269042969, 75.93008422851562, 280.30377197265625, -224.30813598632812, 168.33033752441406, 248.24082946777344, 382.15728759765625, 207.084716796875, 550.0008544921875, 179.4649200439453, 461.6917724609375, 436.22149658203125, -22.933549880981445, 243.96884155273438, 365.3583984375, 61.578468322753906, 297.5687561035156, 135.12376403808594, 343.0783996582031, -46.145668029785156, 204.41018676757812, 35.630638122558594, 313.825439453125, 82.385498046875, 54.977630615234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000275.npy"}
|
|
{"epoch": 0.40381791483113066, "step": 276, "batch_size": 64, "mean": 148.5008544921875, "std": 199.32806396484375, "min": -240.4735107421875, "p10": -91.3485458374023, "median": 154.88316345214844, "p90": 401.8749908447267, "max": 798.1087036132812, "pos_frac": 0.765625, "sample": [110.11258697509766, 798.1087036132812, 152.4369659423828, 416.10321044921875, 491.05255126953125, -3.3592166900634766, -5.4404296875, 512.327880859375, 14.285078048706055, 152.65284729003906, 157.1134796142578, 29.93506622314453, -26.13861083984375, 234.59878540039062, 273.60699462890625, 240.868408203125, 194.25009155273438, -228.29946899414062, 151.9206085205078, 15.549747467041016, 34.78080368041992, 302.7705078125, 179.6354217529297, 289.51837158203125, -108.75321960449219, -240.4735107421875, -31.767902374267578, 87.13804626464844, 217.3872833251953, 338.6398010253906, -50.737640380859375, 184.4895477294922, 58.93163299560547, 192.88998413085938, 332.01312255859375, 281.4653015136719, -143.16726684570312, -221.6210479736328, 1.725320816040039, -18.403568267822266, -126.37509155273438, -30.46820068359375, 57.85944747924805, 368.6758117675781, 75.3163833618164, 164.64642333984375, 187.0540008544922, 217.7423858642578, 267.1458740234375, 59.67255401611328, 438.02105712890625, -36.45066833496094, 165.6609649658203, 276.7261047363281, 76.0560302734375, 239.0835723876953, 222.8949432373047, 178.03839111328125, 361.67327880859375, 77.90145874023438, -147.16482543945312, 589.164794921875, 36.45501708984375, 416.5782470703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000276.npy"}
|
|
{"epoch": 0.4052863436123348, "step": 277, "batch_size": 64, "mean": 156.3212890625, "std": 202.14923095703125, "min": -280.46435546875, "p10": -106.0804618835449, "median": 141.84002685546875, "p90": 459.7262786865235, "max": 606.288818359375, "pos_frac": 0.765625, "sample": [-0.16156387329101562, 371.94525146484375, -153.56715393066406, 139.07012939453125, 53.787776947021484, 248.94268798828125, 351.29632568359375, 144.60992431640625, -77.04188537597656, 187.26708984375, 249.4940185546875, -42.728248596191406, 335.6636962890625, -43.566383361816406, 240.3054962158203, 355.947021484375, 70.01085662841797, 144.72317504882812, 198.5430450439453, 307.87103271484375, -118.52556610107422, 357.3395080566406, 95.62901306152344, 81.1429214477539, -186.00497436523438, -26.552955627441406, 492.05841064453125, 10.026153564453125, -19.600921630859375, 94.82061767578125, -280.46435546875, 496.22528076171875, 164.66741943359375, 300.0782165527344, -245.7745361328125, -163.30477905273438, 606.288818359375, 276.660888671875, 146.59463500976562, -6.49932861328125, 231.51580810546875, 15.806838989257812, 499.90234375, 55.15272903442383, 11.77142333984375, 117.35230255126953, 217.69992065429688, 382.5531005859375, 449.82611083984375, 475.45867919921875, -152.5955810546875, 277.1455993652344, 77.9637222290039, 75.61945343017578, 11.692062377929688, -0.7522087097167969, 255.6286163330078, 118.75177764892578, 463.9692077636719, 474.025634765625, 117.49179077148438, 176.3357391357422, 99.58954620361328, 395.4415588378906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000277.npy"}
|
|
{"epoch": 0.4067547723935389, "step": 278, "batch_size": 64, "mean": 178.59414672851562, "std": 198.81155395507812, "min": -138.64137268066406, "p10": -49.43115043640135, "median": 138.73369598388672, "p90": 472.77458190917974, "max": 608.4790649414062, "pos_frac": 0.765625, "sample": [-117.75872802734375, 398.9120788574219, -82.77291107177734, 49.12741470336914, 285.5068054199219, 335.02313232421875, 203.69937133789062, 60.5097770690918, 153.7920684814453, 107.95276641845703, 237.47396850585938, 201.20957946777344, 23.91400718688965, -24.451080322265625, 114.2203369140625, 284.51104736328125, -25.427993774414062, 326.03228759765625, 460.5507507324219, 47.22264862060547, -10.72227668762207, 574.4598999023438, 401.74066162109375, 205.888427734375, 241.52151489257812, 327.83001708984375, 104.49613189697266, -11.337882995605469, 151.67013549804688, 531.5180053710938, 441.2208557128906, -67.77760314941406, 113.91848754882812, -25.831298828125, 567.2404174804688, 478.01336669921875, 17.559959411621094, -20.48175048828125, 142.90863037109375, 228.05056762695312, 103.00501251220703, 350.76019287109375, 191.37432861328125, 68.58221435546875, 40.358245849609375, 531.065673828125, 123.2339859008789, 255.87709045410156, -54.52011489868164, 580.3096313476562, 184.11708068847656, 134.5587615966797, 47.227783203125, 28.72173309326172, 340.1633605957031, 299.043212890625, -37.55690002441406, -18.454885482788086, 608.4790649414062, -138.64137268066406, 424.0128479003906, 132.88394165039062, -125.80441284179688, -69.90455627441406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000278.npy"}
|
|
{"epoch": 0.40822320117474303, "step": 279, "batch_size": 64, "mean": 172.298583984375, "std": 230.1483154296875, "min": -350.374755859375, "p10": -63.445133972167966, "median": 148.60889434814453, "p90": 472.3817047119141, "max": 849.2435302734375, "pos_frac": 0.765625, "sample": [67.76362609863281, 74.95360565185547, 30.135242462158203, 43.777374267578125, -0.896820068359375, -60.413352966308594, 407.0142822265625, 475.3942565917969, -3.7512893676757812, -53.382564544677734, 3.9839401245117188, 238.2412872314453, 171.78192138671875, 36.27268981933594, 493.3965148925781, 590.1666259765625, 66.14442443847656, 179.245361328125, 277.21478271484375, 275.0309143066406, 656.4932861328125, 77.21825408935547, 13.748266220092773, -246.57485961914062, 62.20872497558594, 59.07404327392578, -20.393310546875, 166.079833984375, 86.96456909179688, 539.8953857421875, 108.95269775390625, 41.966331481933594, 218.6536865234375, -64.74446868896484, 267.8547668457031, 465.3524169921875, 244.35891723632812, 849.2435302734375, 496.724365234375, 304.5892333984375, -43.29637145996094, -43.8328857421875, -154.31411743164062, -350.374755859375, 156.41586303710938, 430.5660705566406, 94.60565185546875, 226.6034698486328, 378.18255615234375, 140.8019256591797, 397.6475830078125, 334.00115966796875, 70.3415298461914, -92.09683227539062, 464.1505432128906, 217.89068603515625, 458.13177490234375, 245.56285095214844, -118.046630859375, -229.05006408691406, 357.55072021484375, 208.07559204101562, -19.402912139892578, 257.2574462890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000279.npy"}
|
|
{"epoch": 0.40969162995594716, "step": 280, "batch_size": 64, "mean": 185.492431640625, "std": 194.63612365722656, "min": -308.223876953125, "p10": -72.43897247314453, "median": 174.5429916381836, "p90": 417.4059112548828, "max": 604.3931274414062, "pos_frac": 0.765625, "sample": [118.63014221191406, 305.63275146484375, 160.64700317382812, 235.6936492919922, -12.717697143554688, -44.97248077392578, 398.3603210449219, 503.082275390625, 498.22747802734375, 51.29278564453125, 233.38856506347656, -15.259635925292969, -60.14686965942383, 417.9215087890625, 416.2028503417969, 393.048583984375, 135.2284698486328, 281.742919921875, 159.54420471191406, 303.5212097167969, 108.21922302246094, 512.1751708984375, 402.62890625, 220.0813751220703, 231.12576293945312, 91.76763916015625, 371.16424560546875, 325.71435546875, -3.95037841796875, 235.08001708984375, 155.00527954101562, 153.312744140625, 523.2154541015625, 128.55453491210938, -65.65713500976562, 170.85594177246094, -142.772216796875, 387.8056945800781, 258.59930419921875, 60.684608459472656, 265.2461242675781, 122.33264923095703, -85.05125427246094, 604.3931274414062, -92.49519348144531, -308.223876953125, 124.9289321899414, 477.9390563964844, 178.23004150390625, 134.55645751953125, 190.9436492919922, 408.7120361328125, -75.34547424316406, 179.3138885498047, -33.552337646484375, -90.65935516357422, 201.3316650390625, 159.75511169433594, 82.36740112304688, 347.0078125, 353.44146728515625, -149.99917602539062, -28.523117065429688, 302.18768310546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000280.npy"}
|
|
{"epoch": 0.4111600587371512, "step": 281, "batch_size": 64, "mean": 167.29629516601562, "std": 194.29412841796875, "min": -629.353515625, "p10": -28.54589118957517, "median": 161.98432159423828, "p90": 409.01147460937506, "max": 592.7565307617188, "pos_frac": 0.875, "sample": [125.77974700927734, 292.70343017578125, 137.98345947265625, 242.09486389160156, 58.08340835571289, 224.92501831054688, 24.588912963867188, 186.66796875, 174.1100616455078, 52.51628875732422, 256.19085693359375, 385.32244873046875, 267.64166259765625, 69.04297637939453, 245.5022735595703, -83.04705810546875, 186.02548217773438, 537.2951049804688, 66.30070495605469, 149.85858154296875, 34.186553955078125, 391.8079833984375, 284.5826416015625, 592.7565307617188, 234.46920776367188, 291.2095642089844, 181.41004943847656, 326.05999755859375, -39.38411331176758, 584.6779174804688, 300.3790588378906, 87.83978271484375, 204.10757446289062, 416.3843994140625, 188.77728271484375, -629.353515625, 91.34663391113281, 59.13887023925781, 42.09966278076172, 77.45037841796875, 494.80670166015625, 19.094772338867188, 8.884353637695312, 270.07952880859375, 176.733642578125, 252.34686279296875, 48.47803497314453, 105.78977966308594, 496.4046630859375, 87.28524017333984, -91.88285064697266, 135.74334716796875, -143.2393341064453, 90.10919189453125, -50.03070068359375, -3.2567062377929688, -58.462646484375, 380.2032470703125, 233.63185119628906, 81.97969818115234, 145.062255859375, 280.3582763671875, 7.378416061401367, 419.9326477050781], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000281.npy"}
|
|
{"epoch": 0.41262848751835535, "step": 282, "batch_size": 64, "mean": 129.69415283203125, "std": 220.09547424316406, "min": -235.2471923828125, "p10": -110.71448211669922, "median": 114.64042282104492, "p90": 364.25322570800785, "max": 1235.080810546875, "pos_frac": 0.765625, "sample": [459.23974609375, 156.32591247558594, 66.79872131347656, -206.27703857421875, 183.8607635498047, 241.634033203125, 99.785888671875, 22.133865356445312, 128.552978515625, 372.37701416015625, 261.7898254394531, 219.27899169921875, 364.8630676269531, 100.61710357666016, -35.76756286621094, 226.462158203125, 1235.080810546875, 170.59103393554688, 177.72805786132812, 116.86187744140625, 147.27734375, 224.56887817382812, 14.319927215576172, -151.60720825195312, 511.37890625, 390.36334228515625, 66.66830444335938, 162.27696228027344, 154.55300903320312, 509.34466552734375, 283.71612548828125, 198.72122192382812, 279.8090515136719, 7.8235321044921875, -30.248096466064453, 151.02963256835938, -97.92832946777344, -104.9188461303711, 66.74755859375, -143.0244140625, -23.3079833984375, 37.09783935546875, 112.4189682006836, 137.84207153320312, 258.0120849609375, 7.279884338378906, -130.80276489257812, 344.9171142578125, 216.57867431640625, 123.62161254882812, -82.5706558227539, -68.06724548339844, 25.554885864257812, 52.39822769165039, 90.89976501464844, -171.10833740234375, 279.40850830078125, 24.112897872924805, -113.19832611083984, 362.83026123046875, -235.2471923828125, 40.75852584838867, -31.7314395904541, 39.91947937011719], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000282.npy"}
|
|
{"epoch": 0.41409691629955947, "step": 283, "batch_size": 64, "mean": 200.64541625976562, "std": 209.287353515625, "min": -440.1202392578125, "p10": -47.55516815185546, "median": 190.51596069335938, "p90": 488.37937622070314, "max": 636.6910400390625, "pos_frac": 0.828125, "sample": [-63.71481704711914, -75.8042221069336, 224.1058807373047, 26.919340133666992, 378.6461181640625, 333.47235107421875, -57.42376708984375, 240.3588104248047, 401.9041442871094, 38.785911560058594, 63.139915466308594, 142.63206481933594, -24.872737884521484, 85.49452209472656, 104.36893463134766, 224.0353546142578, -440.1202392578125, 62.1422233581543, 418.45538330078125, -57.86803436279297, 502.0285949707031, 455.16278076171875, 191.0099334716797, 329.1278076171875, 47.3942985534668, 164.17398071289062, -50.47911071777344, 489.579833984375, 350.22735595703125, 508.91741943359375, 47.326629638671875, 290.2384338378906, -155.2294158935547, 205.80142211914062, 477.19366455078125, 77.07432556152344, 84.67062377929688, -8.969532012939453, 83.4926528930664, -40.732635498046875, 540.6580200195312, 190.02198791503906, 327.6020202636719, 213.023681640625, 284.1288757324219, 178.79779052734375, 92.29033660888672, 223.27841186523438, 427.84088134765625, 322.4447326660156, 580.5191650390625, 16.583396911621094, 375.35223388671875, 176.249267578125, 66.38797760009766, 293.7470397949219, 548.047607421875, 201.87123107910156, 407.06817626953125, 636.6910400390625, 485.57830810546875, 113.18860626220703, 101.7398452758789, -34.47096252441406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000283.npy"}
|
|
{"epoch": 0.4155653450807636, "step": 284, "batch_size": 64, "mean": 183.906494140625, "std": 199.7742156982422, "min": -204.06301879882812, "p10": -38.084731292724605, "median": 165.7076187133789, "p90": 452.7003204345704, "max": 753.590576171875, "pos_frac": 0.875, "sample": [-88.16464233398438, 107.01260375976562, 318.18798828125, 39.531494140625, 104.47791290283203, 49.69678497314453, 341.09991455078125, 18.34429168701172, 257.10211181640625, 428.981689453125, 363.05035400390625, -54.793582916259766, 462.8654479980469, 27.682764053344727, 279.4590148925781, 56.9095344543457, 32.181312561035156, 245.62960815429688, 312.87445068359375, 504.6548767089844, 56.710693359375, 649.7744750976562, 173.08306884765625, 286.5901184082031, 141.2330322265625, 21.864486694335938, 158.33216857910156, 89.3819580078125, 287.9736328125, -36.34788513183594, 26.752334594726562, 697.5629272460938, 189.81561279296875, -56.190330505371094, 207.50711059570312, 91.4444580078125, -83.7505874633789, 65.54064178466797, 218.88592529296875, 753.590576171875, 73.52458190917969, 186.58438110351562, 194.35745239257812, 26.055543899536133, -89.5816650390625, -204.06301879882812, 295.63372802734375, 301.1319274902344, 131.0217742919922, 252.95802307128906, 568.8989868164062, 177.70782470703125, 306.5960693359375, 226.1791229248047, 38.315673828125, 10.938159942626953, -38.82909393310547, 6.492042541503906, 371.55181884765625, 248.9883270263672, 27.838600158691406, 507.0194091796875, 28.092918395996094, 376.0650329589844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000284.npy"}
|
|
{"epoch": 0.4170337738619677, "step": 285, "batch_size": 64, "mean": 157.22454833984375, "std": 190.25119018554688, "min": -491.768310546875, "p10": -59.79582595825194, "median": 149.9861068725586, "p90": 380.6869567871094, "max": 598.0444946289062, "pos_frac": 0.84375, "sample": [366.9393310546875, 245.19161987304688, 45.83740997314453, 386.57879638671875, 124.6105728149414, 122.54119873046875, 140.64175415039062, 82.35968780517578, 160.15440368652344, 559.155517578125, 293.55908203125, 152.04359436035156, 320.9173278808594, 116.49689483642578, 129.90296936035156, 552.4676513671875, -17.5018310546875, 109.35696411132812, 145.4490966796875, 238.41854858398438, 422.2738342285156, -233.79879760742188, 57.888389587402344, 309.1543273925781, 411.6089782714844, 6.212409973144531, -65.2474365234375, 92.96881103515625, 92.85760498046875, 46.156898498535156, 257.07513427734375, 208.6230926513672, -93.93180084228516, 54.15511703491211, 274.8907775878906, 321.906494140625, 23.084388732910156, -491.768310546875, 239.83303833007812, 35.733673095703125, 159.1700897216797, 1.447174072265625, 598.0444946289062, 272.8970642089844, -47.075401306152344, 308.72442626953125, 230.20782470703125, 133.74154663085938, 229.98037719726562, 331.4783630371094, 168.12527465820312, -141.86131286621094, 288.7049255371094, 191.95315551757812, -165.10182189941406, -32.42282485961914, 400.199462890625, 90.18489074707031, 95.83971405029297, 203.7449188232422, -157.7315673828125, 328.373046875, 181.020751953125, 147.92861938476562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000285.npy"}
|
|
{"epoch": 0.4185022026431718, "step": 286, "batch_size": 64, "mean": 177.40138244628906, "std": 222.14573669433594, "min": -350.8638610839844, "p10": -93.3379135131836, "median": 183.7704315185547, "p90": 477.2913330078125, "max": 662.489013671875, "pos_frac": 0.78125, "sample": [62.529563903808594, -80.54180145263672, 503.5749816894531, 173.8790740966797, 167.796875, 217.8358154296875, 31.85314178466797, 210.5685272216797, 324.7898254394531, -350.8638610839844, 296.3067932128906, 248.96255493164062, 40.22050476074219, -67.7086410522461, 411.9643859863281, 193.3760986328125, 559.6024169921875, 6.363616943359375, 205.45101928710938, 175.8138427734375, 481.42486572265625, 57.065826416015625, -297.5858154296875, 450.38482666015625, 387.39202880859375, -124.61956787109375, -95.57379150390625, 348.33856201171875, 315.0783386230469, 549.125244140625, 502.16497802734375, -152.009033203125, 214.718994140625, 105.86074829101562, -33.59260559082031, 78.39955139160156, 374.8265380859375, 143.9893035888672, 46.812225341796875, -80.98204040527344, 161.4745330810547, -68.78874969482422, 193.591064453125, -88.12086486816406, 112.39724731445312, 400.15289306640625, 457.57806396484375, 109.7510986328125, -119.95697784423828, 583.429931640625, -69.83438873291016, 244.05789184570312, 69.35159301757812, 467.64642333984375, 64.6923828125, 310.087890625, 191.72702026367188, 241.3475341796875, 271.276611328125, -141.63931274414062, 662.489013671875, 385.9985046386719, 217.76791381835938, 94.2166519165039], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000286.npy"}
|
|
{"epoch": 0.4199706314243759, "step": 287, "batch_size": 64, "mean": 148.47613525390625, "std": 196.89947509765625, "min": -267.3529357910156, "p10": -59.70682525634764, "median": 131.13842010498047, "p90": 442.34926147460953, "max": 718.28466796875, "pos_frac": 0.796875, "sample": [136.48963928222656, 20.35992431640625, 8.866674423217773, 68.73320770263672, 129.18626403808594, -30.019378662109375, 304.0462341308594, 591.626220703125, 166.8810272216797, 144.41033935546875, 515.653076171875, 91.28474426269531, 196.0498504638672, 513.1119995117188, -47.597015380859375, -64.89674377441406, 36.81847381591797, 517.5424194335938, 140.44647216796875, 326.54486083984375, 177.93356323242188, 6.81085205078125, 139.8153076171875, -267.3529357910156, 718.28466796875, 111.9686279296875, 59.774757385253906, -31.378173828125, -35.97633361816406, -16.066871643066406, 133.090576171875, 4.832298278808594, -145.35842895507812, 107.88999938964844, 55.9699592590332, 232.47129821777344, 64.88357543945312, 237.05711364746094, 135.5165252685547, 256.73236083984375, -10.785707473754883, 291.3516845703125, 183.78646850585938, 106.98146057128906, 26.793319702148438, 125.89651489257812, 78.13018798828125, 17.563339233398438, 459.30224609375, 397.2320556640625, -170.59042358398438, 58.777976989746094, 223.677978515625, 136.9819793701172, 369.6527404785156, -178.75363159179688, -150.8481903076172, -71.11907196044922, 239.09210205078125, 306.8487854003906, 476.3111572265625, 402.79229736328125, 215.4197235107422, 255.54013061523438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000287.npy"}
|
|
{"epoch": 0.42143906020558003, "step": 288, "batch_size": 64, "mean": 185.25796508789062, "std": 195.3400115966797, "min": -290.50347900390625, "p10": -2.6403162002563363, "median": 171.15890502929688, "p90": 416.2668395996094, "max": 685.1873779296875, "pos_frac": 0.890625, "sample": [223.82962036132812, 177.17922973632812, -7.433223724365234, 189.25721740722656, 305.920166015625, 417.22314453125, 8.543134689331055, 685.1873779296875, 53.74885559082031, 378.5315246582031, 262.4872131347656, 103.56478881835938, -78.0883560180664, 15.50238037109375, 82.44033813476562, 156.2942657470703, 131.48655700683594, 374.8249816894531, 272.1251220703125, 350.6325988769531, 172.90869140625, 280.3168029785156, 274.87646484375, 13.69830322265625, 88.97636413574219, 238.91061401367188, 238.47409057617188, 60.52326202392578, 169.40911865234375, 72.45606994628906, 184.31570434570312, 227.55450439453125, 555.0787963867188, 159.02679443359375, 414.03546142578125, 288.548095703125, 157.77333068847656, 15.21038818359375, 184.53268432617188, 89.0013198852539, -286.8145751953125, 583.70361328125, 109.8653564453125, 302.239501953125, 74.44313049316406, 168.27584838867188, 73.34488677978516, 51.54450988769531, 582.65576171875, 109.68157196044922, 215.75282287597656, 570.3743286132812, 208.38638305664062, -290.50347900390625, 65.67807006835938, 499.764404296875, 275.6109619140625, 39.01622009277344, 64.14984130859375, 361.5322570800781, -13.979255676269531, 385.32861328125, -199.57899475097656, -82.84548950195312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000288.npy"}
|
|
{"epoch": 0.42290748898678415, "step": 289, "batch_size": 64, "mean": 209.41796875, "std": 222.99002075195312, "min": -173.83245849609375, "p10": -47.55567016601562, "median": 149.1488037109375, "p90": 527.4105834960939, "max": 698.21826171875, "pos_frac": 0.859375, "sample": [165.62765502929688, -50.983917236328125, 611.8580932617188, 668.6036376953125, -155.6293487548828, 566.8831787109375, 430.0162353515625, 318.1545104980469, 81.8175048828125, 597.5146484375, 479.4252014160156, 416.9608154296875, 263.205078125, 506.26873779296875, 367.5694580078125, 274.52789306640625, 38.16087341308594, 698.21826171875, -37.98882293701172, 269.8521728515625, 127.44149780273438, 48.328094482421875, 138.14620971679688, 10.970203399658203, -97.61892700195312, 21.083309173583984, 286.50579833984375, 437.3718566894531, 65.61373901367188, 144.59014892578125, 438.8018493652344, 626.5111694335938, 388.0589904785156, -55.29606246948242, 37.798866271972656, 25.717575073242188, 536.4713745117188, 39.37666702270508, -59.80857849121094, 15.617231369018555, 111.83062744140625, 482.66357421875, -39.556427001953125, 237.87728881835938, 180.23486328125, 415.82684326171875, 429.0880432128906, 2.116395950317383, 140.7063751220703, -110.87483215332031, 141.2711944580078, 153.70745849609375, 99.38119506835938, 35.98353576660156, 469.50048828125, 178.25653076171875, 37.795135498046875, 27.35222625732422, 101.30758666992188, 223.73373413085938, 259.9410095214844, -173.83245849609375, 250.17813110351562, 62.51903533935547], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000289.npy"}
|
|
{"epoch": 0.4243759177679883, "step": 290, "batch_size": 64, "mean": 210.49253845214844, "std": 255.2123565673828, "min": -334.87432861328125, "p10": -68.56448745727539, "median": 194.30931854248047, "p90": 596.8032104492188, "max": 951.947509765625, "pos_frac": 0.828125, "sample": [510.72784423828125, 171.73513793945312, 106.86881256103516, 164.0107421875, -334.87432861328125, 378.2066345214844, 439.897216796875, -70.02755737304688, 28.117324829101562, 130.7261505126953, -197.12631225585938, 377.3943786621094, 241.20306396484375, 40.586090087890625, 233.1905517578125, 225.98214721679688, 551.0110473632812, -33.783111572265625, 223.57070922851562, 19.93472671508789, 59.17958068847656, 71.54197692871094, -185.48138427734375, 157.08740234375, 114.005126953125, -255.63897705078125, 167.04766845703125, 319.8685302734375, 951.947509765625, 685.9820556640625, 71.01557922363281, 231.35072326660156, 301.24591064453125, 618.7108154296875, 193.0584716796875, 65.74165344238281, 293.5506286621094, 110.63932800292969, 336.5488586425781, 239.0670166015625, 37.65618896484375, 213.2917938232422, 195.56016540527344, 298.94635009765625, 35.90324401855469, 278.82928466796875, -65.1506576538086, -23.727327346801758, 323.89697265625, -128.10354614257812, 686.8070068359375, 394.33538818359375, 251.70712280273438, 588.070556640625, 14.839183807373047, 636.1763916015625, 600.5457763671875, 277.8836364746094, -1.3515510559082031, 52.053924560546875, 68.85415649414062, 822.058837890625, 240.67117309570312, -82.05105590820312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000290.npy"}
|
|
{"epoch": 0.42584434654919234, "step": 291, "batch_size": 64, "mean": 190.36480712890625, "std": 222.8323211669922, "min": -414.1673889160156, "p10": -52.285391235351554, "median": 199.96786499023438, "p90": 447.607211303711, "max": 649.671875, "pos_frac": 0.765625, "sample": [207.56236267089844, 192.3733673095703, 9.485671997070312, 25.333139419555664, -9.396743774414062, 277.26031494140625, 620.956298828125, 293.1699523925781, 472.696044921875, 283.8470153808594, 73.63954162597656, 416.60577392578125, 268.3681640625, -26.955223083496094, 649.671875, -45.56121826171875, 435.82672119140625, 242.6634063720703, -18.06292152404785, 242.4158172607422, 442.97216796875, -1.8536529541015625, 309.794677734375, 90.62240600585938, 267.83062744140625, 420.1885986328125, 140.40750122070312, 190.1298065185547, 449.5936584472656, 29.811626434326172, 307.31719970703125, 113.4247817993164, 339.29327392578125, -273.3763427734375, 55.31494140625, -140.86862182617188, -45.943382263183594, 53.108299255371094, -414.1673889160156, 580.5804443359375, 364.5389404296875, 434.7288513183594, 395.58587646484375, 225.40789794921875, -84.69845581054688, -36.175201416015625, -79.02987670898438, 373.65618896484375, 51.40388488769531, 151.10043334960938, 82.26643371582031, 171.12991333007812, 360.269287109375, 311.1983947753906, -11.724456787109375, 218.84579467773438, 417.69732666015625, 70.65754699707031, 538.5852661132812, -55.003395080566406, -151.57693481445312, 373.30108642578125, 533.1115112304688, 1.9916973114013672], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000291.npy"}
|
|
{"epoch": 0.42731277533039647, "step": 292, "batch_size": 64, "mean": 203.92965698242188, "std": 262.3307800292969, "min": -653.1588134765625, "p10": -71.30237159729002, "median": 199.55672454833984, "p90": 542.2282531738281, "max": 807.18115234375, "pos_frac": 0.796875, "sample": [237.5003662109375, 30.86966323852539, -7.5624542236328125, 195.82589721679688, 317.9940490722656, 227.86483764648438, 112.8507080078125, 119.46868133544922, 203.20809936523438, -653.1588134765625, 734.9251708984375, 50.059532165527344, 609.6227416992188, 142.612060546875, 339.08941650390625, -82.16793823242188, -269.19940185546875, 452.3919372558594, 60.80487823486328, 325.150634765625, 3.5456924438476562, 756.5081787109375, 412.9684143066406, 218.63174438476562, 451.8395080566406, 541.841552734375, 275.34552001953125, 479.6505126953125, 176.10845947265625, 135.09732055664062, -160.02877807617188, 248.7496337890625, 10.016939163208008, 539.6542358398438, -11.475959777832031, 136.63919067382812, -93.85385131835938, 169.39744567871094, 113.18858337402344, 117.67961120605469, 36.25669860839844, 542.3939819335938, 221.66635131835938, 195.9053497314453, 217.49310302734375, 239.7931365966797, 407.8848571777344, 368.833740234375, 807.18115234375, -28.389284133911133, 183.61111450195312, 322.7503967285156, 242.30760192871094, -224.2810516357422, 254.1982879638672, -195.85150146484375, 728.7909545898438, -45.94938278198242, -40.201656341552734, 571.8243408203125, 211.1774139404297, 236.42861938476562, 138.09657287597656, -10.076700210571289], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000292.npy"}
|
|
{"epoch": 0.4287812041116006, "step": 293, "batch_size": 64, "mean": 167.72970581054688, "std": 258.4353332519531, "min": -333.13763427734375, "p10": -142.8237518310547, "median": 144.8123779296875, "p90": 476.6916961669922, "max": 999.3621826171875, "pos_frac": 0.734375, "sample": [39.07621765136719, 44.9227294921875, 21.893896102905273, 78.24738311767578, 240.82388305664062, 224.21034240722656, 492.1089782714844, 176.93226623535156, 490.11895751953125, 49.87744903564453, 476.8979187011719, -83.20875549316406, 253.179931640625, 15.813236236572266, 999.3621826171875, -260.2066345214844, 295.3479309082031, 182.93704223632812, 107.09255981445312, 304.5707702636719, 391.7350769042969, -144.171142578125, 301.31329345703125, -61.66657257080078, 44.33599853515625, 81.55152130126953, -124.93732452392578, 164.0971221923828, -57.56797790527344, 523.9264526367188, 452.10394287109375, 354.4917297363281, 344.01806640625, 439.4097595214844, 342.1193542480469, -164.2631378173828, 80.81907653808594, 164.53564453125, 231.27407836914062, -39.42717742919922, -110.25493621826172, 81.82950592041016, 7.10174560546875, 258.8061828613281, 324.27166748046875, 398.6826477050781, 256.9227294921875, 329.8837890625, 125.52763366699219, -333.13763427734375, -22.935333251953125, -250.71070861816406, -147.28651428222656, 476.21051025390625, 896.2132568359375, 119.92598724365234, -139.67984008789062, 181.48199462890625, 76.94673156738281, -105.44978332519531, -151.26434326171875, 403.4626159667969, 585.0216674804688, -0.5649662017822266], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000293.npy"}
|
|
{"epoch": 0.4302496328928047, "step": 294, "batch_size": 64, "mean": 180.9390106201172, "std": 252.9709930419922, "min": -326.4017639160156, "p10": -118.647622680664, "median": 113.79232788085938, "p90": 541.6396484375, "max": 899.999755859375, "pos_frac": 0.859375, "sample": [39.68280792236328, 80.87774658203125, 322.6628723144531, 426.7301330566406, 63.13330078125, 30.749187469482422, -152.62045288085938, 142.76907348632812, 5.89093017578125, 56.97296905517578, 18.896896362304688, 27.980026245117188, -51.82442855834961, 580.3214111328125, 36.766746520996094, 317.1308898925781, 180.03501892089844, 446.23388671875, 19.150196075439453, 173.93678283691406, 768.9276123046875, -171.1724090576172, 263.45452880859375, 60.641693115234375, 691.422607421875, 439.5895690917969, 315.69512939453125, 171.0049591064453, 191.03079223632812, -147.2861328125, 289.3864440917969, 322.6746520996094, 311.0080871582031, 899.999755859375, -182.20462036132812, -169.56752014160156, 699.9041748046875, 177.28858947753906, 523.8421630859375, 12.508552551269531, -326.4017639160156, -206.38674926757812, 274.36492919921875, 331.36566162109375, 636.3304443359375, 116.22959899902344, 34.69974136352539, 134.3026123046875, 531.3383178710938, 48.79432678222656, 329.8326416015625, 342.6609802246094, -21.495948791503906, 111.35505676269531, 43.72098922729492, 68.19818115234375, 38.368709564208984, 23.737388610839844, 31.468334197998047, 165.2220458984375, 546.0545043945312, 66.93276977539062, 6.840240478515625, 18.93878746032715], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000294.npy"}
|
|
{"epoch": 0.43171806167400884, "step": 295, "batch_size": 64, "mean": 203.06219482421875, "std": 272.74285888671875, "min": -322.7943115234375, "p10": -150.92278671264646, "median": 197.32894134521484, "p90": 549.9784240722656, "max": 1021.1690673828125, "pos_frac": 0.765625, "sample": [-322.7943115234375, -76.83148193359375, 643.3818359375, 12.060426712036133, -291.65679931640625, 216.96347045898438, -170.9935760498047, 315.4996032714844, 272.7560119628906, -280.3798828125, 367.28009033203125, -48.28575134277344, 191.60317993164062, 399.2694091796875, 457.982421875, 273.9661560058594, 203.05470275878906, 329.2642517089844, -62.431663513183594, 87.92839813232422, 391.34002685546875, -45.201194763183594, 117.4473876953125, 386.74102783203125, 15.507774353027344, 128.4146728515625, 539.9781494140625, -165.83375549316406, 739.4041748046875, 180.38510131835938, 446.7496032714844, 68.56796264648438, -5.115543365478516, -163.94390869140625, 545.92236328125, 581.81201171875, 104.33150482177734, -120.54016876220703, -309.09893798828125, 48.85737609863281, 262.3689270019531, 471.3667297363281, 374.61468505859375, 103.49943542480469, -36.02709197998047, 108.02482604980469, 237.2077178955078, 58.18323516845703, 208.12637329101562, 582.6065673828125, 458.66229248046875, 410.97503662109375, 77.21340942382812, 1021.1690673828125, 551.7167358398438, 448.0397644042969, 150.7298583984375, 229.33731079101562, 237.72482299804688, 206.5956268310547, 73.0083999633789, 98.27424621582031, 661.46875, -2.2685775756835938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000295.npy"}
|
|
{"epoch": 0.4331864904552129, "step": 296, "batch_size": 64, "mean": 143.65863037109375, "std": 274.4779968261719, "min": -754.7303466796875, "p10": -224.58747253417968, "median": 144.15277099609375, "p90": 519.9936279296875, "max": 842.971435546875, "pos_frac": 0.78125, "sample": [192.8964385986328, 51.48725128173828, 134.10418701171875, 842.971435546875, 66.50634765625, 522.9747314453125, -253.5963134765625, -114.55769348144531, -55.83332061767578, -754.7303466796875, 122.05972290039062, 262.89654541015625, -359.45294189453125, 191.06321716308594, 173.5232391357422, -312.526611328125, 194.7974090576172, 435.99139404296875, 607.2120361328125, 402.6131591796875, 244.66339111328125, 287.83673095703125, 2.3249740600585938, -115.97825622558594, 112.70388793945312, 399.98095703125, 74.64053344726562, -270.877685546875, 29.7833251953125, -231.98597717285156, 569.9010620117188, 54.94175720214844, 280.4371032714844, 61.122398376464844, 243.35488891601562, 539.9286499023438, 170.63648986816406, 117.21646118164062, -207.3242950439453, 176.00253295898438, -270.24237060546875, 306.0119323730469, -67.8783950805664, -64.33517456054688, 513.0377197265625, 102.23889923095703, 127.80657196044922, 242.77101135253906, 178.623046875, 190.77377319335938, 154.20135498046875, 13.347450256347656, 16.09107208251953, 759.2525024414062, 184.79588317871094, 233.49075317382812, 98.85477447509766, 73.3558578491211, 566.5496215820312, -105.71749877929688, 186.0039520263672, 96.59272003173828, 367.8834228515625, 400.9344177246094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000296.npy"}
|
|
{"epoch": 0.434654919236417, "step": 297, "batch_size": 64, "mean": 159.1259002685547, "std": 210.94931030273438, "min": -257.0852355957031, "p10": -62.014016723632814, "median": 123.6854476928711, "p90": 468.7324859619141, "max": 702.2529296875, "pos_frac": 0.765625, "sample": [158.0176544189453, 297.9625244140625, 363.54852294921875, 567.2190551757812, 24.660564422607422, 24.78228187561035, 372.3638916015625, 210.50588989257812, -209.6540069580078, -63.8658332824707, 71.95040893554688, 133.94647216796875, 204.27349853515625, 418.70452880859375, 308.08319091796875, -39.33440399169922, -39.8380126953125, 31.3275146484375, -76.45596313476562, -12.081367492675781, 187.23158264160156, -195.0623321533203, 94.35758972167969, -32.60026550292969, 519.1779174804688, 93.96466064453125, 318.6398010253906, 79.677001953125, 216.0191650390625, 261.0835876464844, 167.74468994140625, 111.27377319335938, 459.49578857421875, 200.66246032714844, 483.69573974609375, 24.216609954833984, 96.99302673339844, 12.610237121582031, 525.2127075195312, 287.4793395996094, 6.3727264404296875, 197.005859375, -257.0852355957031, 359.984130859375, 72.30745697021484, -60.360755920410156, 124.87802124023438, 64.30691528320312, 702.2529296875, -41.74122619628906, 158.26608276367188, 472.6910705566406, 231.3828125, -15.315185546875, -233.12042236328125, 357.5257263183594, -39.335655212402344, 122.49287414550781, 241.94320678710938, 617.3983154296875, 113.48468017578125, 357.70428466796875, -62.722557067871094, 35.75251770019531], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000297.npy"}
|
|
{"epoch": 0.43612334801762115, "step": 298, "batch_size": 64, "mean": 210.67361450195312, "std": 304.3543701171875, "min": -510.85504150390625, "p10": -98.88865280151367, "median": 156.20315551757812, "p90": 623.8251098632815, "max": 1336.5654296875, "pos_frac": 0.6875, "sample": [105.90558624267578, -130.54360961914062, 12.672130584716797, -510.85504150390625, 37.844879150390625, -93.1925048828125, 27.690996170043945, 736.6243286132812, 240.62655639648438, -26.49462890625, 180.17919921875, 506.09063720703125, -10.035476684570312, -116.15318298339844, -146.17593383789062, 56.15807342529297, 157.27508544921875, 113.25311279296875, 114.41983795166016, 314.9956359863281, 799.587890625, -8.4471435546875, 404.6700439453125, -188.00909423828125, 640.0811767578125, 337.9031982421875, -97.78889465332031, 506.91204833984375, 585.894287109375, 724.6159057617188, -99.35997772216797, -121.4158935546875, -51.81084442138672, -0.8617019653320312, -50.825294494628906, 655.7597045898438, 713.5599365234375, 141.05552673339844, 393.90142822265625, 231.6541748046875, -35.75787353515625, -11.655838012695312, 1336.5654296875, 260.6585998535156, 582.8233032226562, 240.14474487304688, 464.60162353515625, 439.5422668457031, -3.6079978942871094, 71.11807250976562, 201.00186157226562, 445.06732177734375, 333.0810546875, 116.10449981689453, 508.74688720703125, 175.2768096923828, -57.47027587890625, 358.4345703125, -32.38462829589844, 155.1312255859375, 222.38211059570312, 226.88877868652344, 60.44775390625, 338.6082763671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000298.npy"}
|
|
{"epoch": 0.43759177679882527, "step": 299, "batch_size": 64, "mean": 184.48007202148438, "std": 261.30841064453125, "min": -386.64422607421875, "p10": -125.35406265258787, "median": 156.04216766357422, "p90": 558.1940979003907, "max": 852.5237426757812, "pos_frac": 0.765625, "sample": [-134.94970703125, -253.7271728515625, -386.64422607421875, 393.8914489746094, 58.793670654296875, 36.5665283203125, 351.88287353515625, 131.58489990234375, 3.438882827758789, 130.34912109375, 637.7859497070312, 547.197509765625, 745.232421875, -102.96422576904297, 184.83375549316406, 740.9564819335938, -198.57049560546875, 333.7088928222656, 175.10287475585938, 237.9367218017578, -142.45440673828125, 309.9245300292969, 190.5107421875, 138.6407470703125, 447.1689147949219, 115.8995361328125, 852.5237426757812, 9.512531280517578, 229.13558959960938, -11.58005142211914, -6.693063735961914, 125.29246520996094, -95.7232666015625, 69.96604919433594, 208.97744750976562, 562.9069213867188, 136.2744598388672, 175.3262481689453, -15.504180908203125, 169.44619750976562, 99.4713363647461, 340.8248291015625, 304.90777587890625, -43.16452407836914, 177.5643310546875, -91.13227081298828, -222.47828674316406, 525.662109375, -5.8460693359375, 776.5595703125, -153.922607421875, 43.125282287597656, 288.8419494628906, 304.8602600097656, 245.15969848632812, 393.5196533203125, 82.310791015625, 636.5615844726562, 241.12094116210938, 142.6381378173828, 33.879119873046875, 334.9535217285156, 2.6152420043945312, 246.73406982421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000299.npy"}
|
|
{"epoch": 0.4390602055800294, "step": 300, "batch_size": 64, "mean": 223.01820373535156, "std": 282.8573303222656, "min": -369.0091857910156, "p10": -176.35675659179685, "median": 239.493408203125, "p90": 596.8988830566407, "max": 770.5997314453125, "pos_frac": 0.796875, "sample": [309.8787536621094, 304.92791748046875, 164.68206787109375, 336.8060302734375, 644.109130859375, -369.0091857910156, -159.6893310546875, 621.8292846679688, 419.822021484375, 40.70084762573242, -79.22994232177734, 305.73089599609375, 144.56590270996094, 561.8768310546875, 336.3076477050781, 396.18206787109375, 126.23883056640625, 507.981201171875, 39.639503479003906, 73.19393920898438, -30.572509765625, 228.36215209960938, 250.62466430664062, -29.322669982910156, 477.5173034667969, 409.59429931640625, -146.84536743164062, 85.73365783691406, 198.5152130126953, 312.70440673828125, 92.05158996582031, 730.7183837890625, 260.1745910644531, -340.6798400878906, 510.80010986328125, -297.246826171875, 588.7449340820312, 325.0780334472656, 481.9105224609375, 190.391845703125, 101.5246353149414, 433.01654052734375, 662.01416015625, 344.6693115234375, 40.39401626586914, 355.4223327636719, -52.2259407043457, -237.07135009765625, 726.498291015625, 414.8533630371094, 474.072509765625, -264.69256591796875, 293.1900939941406, -215.85104370117188, 40.5601692199707, 128.0408935546875, 498.9662170410156, 79.74832153320312, 600.3934326171875, 770.5997314453125, -183.49993896484375, 33.48414993286133, 198.22430419921875, 6.0349578857421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000300.npy"}
|
|
{"epoch": 0.44052863436123346, "step": 301, "batch_size": 64, "mean": 209.16920471191406, "std": 260.0733337402344, "min": -198.63282775878906, "p10": -75.1329978942871, "median": 149.82035064697266, "p90": 551.4416259765626, "max": 939.3267211914062, "pos_frac": 0.8125, "sample": [225.32191467285156, 301.1949462890625, -71.37323760986328, 351.64801025390625, 378.65081787109375, 300.72027587890625, 775.11669921875, 19.632675170898438, 471.6485595703125, -9.204450607299805, 852.514404296875, 369.94122314453125, 46.00995635986328, 265.02215576171875, -163.50404357910156, 157.4540252685547, 99.53382110595703, 651.7515258789062, 56.80940246582031, 353.0867919921875, 760.4154663085938, 47.6266975402832, -80.34210968017578, 68.40019226074219, 425.3974304199219, 15.64813232421875, 23.09026336669922, 48.34815979003906, 272.5283203125, 379.8998107910156, 266.290283203125, 297.4354248046875, 84.02590942382812, 179.22256469726562, 51.33464813232422, 76.74267578125, 214.21397399902344, -159.1791229248047, 11.326539993286133, -11.49298095703125, 55.60797119140625, 37.86188507080078, 225.41299438476562, -97.04451751708984, 44.14888000488281, 939.3267211914062, 328.9676208496094, 185.76272583007812, 724.4197387695312, 106.33139038085938, 515.5680541992188, 142.18667602539062, 343.72210693359375, 530.626708984375, 399.73834228515625, 67.06828308105469, -76.74432373046875, -198.63282775878906, 560.3623046875, -178.76144409179688, 105.96955871582031, -62.524871826171875, -21.10811996459961, 305.6560363769531], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000301.npy"}
|
|
{"epoch": 0.4419970631424376, "step": 302, "batch_size": 64, "mean": 178.58872985839844, "std": 232.7600555419922, "min": -253.24072265625, "p10": -87.3170150756836, "median": 167.67567443847656, "p90": 494.82401733398456, "max": 842.2054443359375, "pos_frac": 0.765625, "sample": [33.67564392089844, 405.6905517578125, 719.3792114257812, -15.412715911865234, 191.33245849609375, -21.565397262573242, 215.4721221923828, 26.7279052734375, 191.14244079589844, 23.464385986328125, 216.64199829101562, 48.009613037109375, 80.21709442138672, 73.88541412353516, 169.901123046875, 105.1303939819336, 391.920166015625, -6.469539642333984, 447.4788818359375, 346.1546325683594, 209.03553771972656, 193.9251708984375, 173.1444091796875, 308.90252685546875, -86.99533081054688, -101.32030487060547, 367.9771728515625, 560.448974609375, 111.25765228271484, -233.4407501220703, 225.18283081054688, -81.3749771118164, 167.6682586669922, -19.365951538085938, -87.45487976074219, 521.8349609375, 842.2054443359375, 512.2789306640625, 175.8502960205078, -124.60888671875, -53.674468994140625, 421.8729248046875, -253.24072265625, 260.4464111328125, 454.09588623046875, 43.94243621826172, 128.8390655517578, 666.7507934570312, 167.68309020996094, 360.4976806640625, 59.845436096191406, 265.45849609375, -163.487548828125, 258.8773193359375, 87.48884582519531, 229.1629638671875, -61.95588684082031, 418.75634765625, 59.95713806152344, 645.51953125, -110.17947387695312, 71.10417175292969, 112.78827667236328, 81.20159912109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000302.npy"}
|
|
{"epoch": 0.4434654919236417, "step": 303, "batch_size": 64, "mean": 160.39822387695312, "std": 242.84255981445312, "min": -437.1929626464844, "p10": -112.02570495605468, "median": 149.0877227783203, "p90": 534.3194091796876, "max": 644.4631958007812, "pos_frac": 0.703125, "sample": [301.60382080078125, -9.58304214477539, 50.16314697265625, 597.4494018554688, -99.19058227539062, 368.4919738769531, 503.01611328125, 636.6475830078125, -9.93679428100586, -112.98883056640625, 346.63134765625, 184.53530883789062, 330.32830810546875, -95.00999450683594, 71.64574432373047, 154.94537353515625, 70.52571105957031, -7.961393356323242, -60.98375701904297, 387.31658935546875, -437.1929626464844, -54.05900192260742, -84.6559066772461, 297.3996276855469, 438.9646301269531, 549.5357666015625, -158.08631896972656, 269.37908935546875, -241.80181884765625, 112.73985290527344, -109.77841186523438, 290.7158203125, 644.4631958007812, 224.93519592285156, 450.64111328125, 271.1927795410156, -167.946533203125, 160.6531982421875, 127.84626007080078, 624.0447998046875, 579.3077392578125, 31.69000244140625, 157.272705078125, 521.5833740234375, 139.26266479492188, 194.55807495117188, 256.9775390625, 322.16021728515625, 148.86331176757812, -27.71993064880371, 114.88082885742188, -54.71056365966797, 210.2848663330078, 36.05104446411133, 149.3121337890625, 539.7777099609375, -199.087158203125, -41.50431823730469, 26.872791290283203, 59.17935562133789, 261.52081298828125, 3.769256591796875, -175.4957275390625, 194.04327392578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000303.npy"}
|
|
{"epoch": 0.44493392070484583, "step": 304, "batch_size": 64, "mean": 180.16664123535156, "std": 169.39306640625, "min": -267.34478759765625, "p10": -10.012712097167967, "median": 168.5331802368164, "p90": 396.77074584960945, "max": 585.0170288085938, "pos_frac": 0.859375, "sample": [0.002166748046875, 260.1563720703125, 285.9625244140625, 76.73693084716797, -8.18695068359375, -10.795181274414062, -6.064300537109375, 49.21931457519531, 152.71548461914062, 135.82351684570312, 290.1548156738281, 369.03704833984375, 180.8660888671875, 327.39019775390625, 57.76957702636719, 268.9940185546875, 297.1437072753906, 330.51678466796875, -69.11731719970703, 15.994754791259766, 157.87081909179688, 377.145751953125, 35.57221984863281, 73.10684204101562, 297.3309631347656, 77.48048400878906, 285.59283447265625, 53.192474365234375, 319.3586730957031, 90.22885131835938, 124.97798156738281, 75.84872436523438, 585.0170288085938, 104.7092056274414, -56.459266662597656, -267.34478759765625, 230.85177612304688, -37.204742431640625, 43.41639709472656, 274.088623046875, -52.800445556640625, 460.46270751953125, 89.68266296386719, 42.82127380371094, 462.54327392578125, 160.08334350585938, 11.791481018066406, 71.76908874511719, 233.54949951171875, 344.99462890625, 234.88021850585938, -85.70750427246094, 176.98301696777344, 492.1534118652344, 407.00128173828125, 93.69132995605469, 244.20333862304688, 284.107177734375, 405.18145751953125, 521.9627075195312, 329.0472412109375, 229.98028564453125, 302.426025390625, 220.75709533691406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000304.npy"}
|
|
{"epoch": 0.44640234948604995, "step": 305, "batch_size": 64, "mean": 159.6190643310547, "std": 244.72914123535156, "min": -327.30889892578125, "p10": -101.19686279296874, "median": 100.47954559326172, "p90": 489.8157135009766, "max": 1082.4947509765625, "pos_frac": 0.765625, "sample": [396.3315124511719, 287.0711669921875, 102.18058776855469, 203.04891967773438, 159.1953887939453, 663.6962890625, 253.97555541992188, 34.14643859863281, 273.9232177734375, 526.3040771484375, 91.26869201660156, 81.34734344482422, 94.24410247802734, -102.26751708984375, -98.69866943359375, -0.5286331176757812, 286.70684814453125, 495.0993957519531, 48.83136749267578, 74.78203582763672, 1082.4947509765625, 321.18048095703125, 40.08285140991211, 283.38525390625, 289.4412841796875, -74.58441162109375, 7.930822372436523, 241.45213317871094, 224.59019470214844, -108.45767211914062, -43.24609375, 269.89447021484375, -44.42875671386719, 331.4030456542969, 98.77850341796875, 137.2489776611328, 592.500732421875, 301.73516845703125, -32.16923522949219, 157.99395751953125, 121.08623504638672, -42.10570526123047, 94.59675598144531, 96.63054656982422, 600.5863647460938, 554.28466796875, 232.89463806152344, -26.304046630859375, -278.93402099609375, 127.08087921142578, -324.296875, 477.48712158203125, 410.38134765625, 50.45055389404297, -107.82977294921875, 31.12091064453125, -123.73480987548828, 42.201560974121094, 264.6587829589844, -327.30889892578125, 62.392799377441406, 304.78466796875, 10.427413940429688, 17.18451690673828], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000305.npy"}
|
|
{"epoch": 0.447870778267254, "step": 306, "batch_size": 64, "mean": 159.2891387939453, "std": 200.1483612060547, "min": -398.86669921875, "p10": -45.20945205688475, "median": 114.12085723876953, "p90": 416.66199645996096, "max": 876.305419921875, "pos_frac": 0.8125, "sample": [4.713855743408203, -1.6908683776855469, 93.14822387695312, 52.083831787109375, 23.63543701171875, -57.11427307128906, 347.2126159667969, 5.144523620605469, 98.84149169921875, -80.5817642211914, 226.45823669433594, 382.9200134277344, 187.08352661132812, 418.17279052734375, 61.66497802734375, 68.2263412475586, 10.375370025634766, 443.1620788574219, 109.17000579833984, 31.423355102539062, 119.07170867919922, -56.743492126464844, -23.676132202148438, 448.1488342285156, 268.8565673828125, 290.9148254394531, -20.02627944946289, -7.34144401550293, 413.1368103027344, 105.16789245605469, 141.13983154296875, 181.87680053710938, 244.3486328125, 426.137939453125, 34.63397216796875, 131.2550811767578, 79.0343017578125, -108.80579376220703, 18.157047271728516, 270.0487060546875, -398.86669921875, 337.81976318359375, 96.0148696899414, -32.67341613769531, 50.77461242675781, 275.330322265625, -50.58203887939453, 131.44830322265625, 553.5738525390625, 505.74444580078125, 196.75418090820312, 310.20501708984375, 211.25978088378906, -87.12755584716797, 390.7803955078125, 7.300792694091797, 81.96258544921875, 36.391319274902344, 294.62103271484375, 131.16061401367188, 876.305419921875, 195.97836303710938, 344.3143310546875, 356.6292724609375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000306.npy"}
|
|
{"epoch": 0.44933920704845814, "step": 307, "batch_size": 64, "mean": 165.57266235351562, "std": 213.3192901611328, "min": -404.1173095703125, "p10": -89.22013397216797, "median": 117.20365905761719, "p90": 467.6151916503908, "max": 783.3717651367188, "pos_frac": 0.796875, "sample": [374.9013977050781, 148.85830688476562, 113.81035614013672, 324.48553466796875, 188.88092041015625, 144.9401397705078, -122.51904296875, 81.36328887939453, -81.03565979003906, 535.8094482421875, 155.902587890625, -26.35588836669922, 91.49226379394531, -131.87884521484375, 424.0733642578125, -404.1173095703125, 486.8730163574219, -171.8448944091797, 225.85507202148438, -67.50582122802734, 207.44378662109375, 288.294189453125, 89.47737884521484, 160.86439514160156, 483.42816162109375, 237.75265502929688, 37.27705001831055, 115.8406982421875, 60.262325286865234, 549.320068359375, 96.30686950683594, 487.8838806152344, 79.60733795166016, 105.44419860839844, -86.35981750488281, 118.56661987304688, -90.44598388671875, 37.89207458496094, 430.71826171875, 368.71441650390625, -21.093284606933594, 347.01611328125, 59.487754821777344, 333.8163146972656, 202.3826446533203, 31.057662963867188, 405.15789794921875, 261.81658935546875, 18.622156143188477, 102.48265075683594, 239.16893005371094, 15.094669342041016, 358.2056884765625, 566.0908813476562, 279.0421447753906, 783.3717651367188, 92.92736053466797, -106.55653381347656, 199.9869842529297, 109.88189697265625, -20.567642211914062, 315.8348693847656, 70.56526184082031, -117.41931915283203], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000307.npy"}
|
|
{"epoch": 0.45080763582966227, "step": 308, "batch_size": 64, "mean": 187.2371368408203, "std": 191.56558227539062, "min": -234.85244750976562, "p10": -41.93237667083739, "median": 182.48423767089844, "p90": 464.8689178466797, "max": 567.525390625, "pos_frac": 0.8125, "sample": [165.91908264160156, 439.4415283203125, -234.85244750976562, 252.56875610351562, 31.6697998046875, 493.069091796875, -151.35137939453125, 10.830814361572266, 36.0462646484375, 89.59571075439453, 455.74481201171875, -8.458858489990234, 225.99472045898438, 101.28701782226562, 364.60565185546875, 488.8844909667969, -125.71182250976562, 175.59288024902344, 188.20489501953125, 18.790870666503906, 255.8253631591797, 499.9164733886719, 277.3861083984375, 468.15960693359375, -103.613037109375, 269.503173828125, 154.56039428710938, 136.56016540527344, 129.00758361816406, -47.20085144042969, 261.72998046875, 131.09759521484375, 185.31454467773438, 230.12425231933594, 282.4542236328125, 457.1906433105469, 484.94720458984375, 201.09735107421875, -170.51271057128906, 60.90618896484375, 96.95584106445312, 349.41839599609375, 312.9098205566406, 168.53109741210938, 163.9851531982422, 567.525390625, 237.60916137695312, 49.07765197753906, -10.471778869628906, 363.16119384765625, 521.0478515625, 276.94366455078125, 179.6539306640625, -14.24378776550293, -29.63926887512207, 66.28207397460938, 346.1310119628906, 319.935791015625, 225.0388641357422, -11.884849548339844, 394.59686279296875, 317.58514404296875, -112.74131774902344, 23.442527770996094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000308.npy"}
|
|
{"epoch": 0.4522760646108664, "step": 309, "batch_size": 64, "mean": 186.04051208496094, "std": 229.60084533691406, "min": -632.3908081054688, "p10": -88.00826416015624, "median": 167.0595245361328, "p90": 471.99248962402345, "max": 709.281005859375, "pos_frac": 0.796875, "sample": [159.04705810546875, 68.68255615234375, 114.98577117919922, -102.27011108398438, 320.14306640625, 167.001953125, 126.65984344482422, -17.303739547729492, 530.8104858398438, 52.5113525390625, 249.12322998046875, 432.499267578125, 113.70446014404297, 506.9980163574219, 65.95020294189453, 51.063575744628906, -88.10612487792969, 290.55999755859375, 482.5101623535156, 349.9544372558594, 260.28582763671875, -632.3908081054688, 243.39633178710938, -117.59999084472656, 71.89022827148438, 170.37222290039062, 261.4957275390625, 325.63433837890625, 33.05860900878906, 69.84217834472656, -153.97906494140625, 642.3009033203125, 31.3876953125, -3.4001617431640625, 131.35830688476562, 423.1337890625, 709.281005859375, 464.2544860839844, 137.05990600585938, -22.467849731445312, -8.396419525146484, 85.80867767333984, 64.51121520996094, 428.8436584472656, 313.41107177734375, 282.0113830566406, 475.30877685546875, 393.09027099609375, 319.84722900390625, -4.920627593994141, 438.5065002441406, -173.0512237548828, 226.75411987304688, 315.7897033691406, -115.23143005371094, 70.23663330078125, 554.851318359375, 416.7225646972656, -87.77992248535156, 167.11709594726562, 174.07421875, 27.890666961669922, 396.3174133300781, 225.4409942626953], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000309.npy"}
|
|
{"epoch": 0.45374449339207046, "step": 310, "batch_size": 64, "mean": 177.672607421875, "std": 192.5367431640625, "min": -266.35772705078125, "p10": -47.50640106201172, "median": 168.41859436035156, "p90": 420.31712646484385, "max": 610.8153686523438, "pos_frac": 0.828125, "sample": [-46.22079086303711, 169.75161743164062, 400.75848388671875, -72.11394500732422, 118.18818664550781, 338.3069152832031, 55.8923225402832, 1.2869510650634766, 27.32200813293457, 219.89016723632812, -209.45318603515625, 398.946533203125, 460.8541564941406, -48.057376861572266, 63.735755920410156, 298.3170166015625, 214.4145965576172, 250.09861755371094, 88.27444458007812, 155.2415771484375, -16.683826446533203, 105.311767578125, 321.51470947265625, 64.318359375, 178.47650146484375, 559.6090698242188, 331.4693603515625, 8.365543365478516, 35.57502746582031, 278.638916015625, -266.35772705078125, 610.8153686523438, -34.60200881958008, 150.716552734375, 602.5712890625, 194.89593505859375, 267.3247375488281, -28.983335494995117, -53.798255920410156, 26.018699645996094, 73.54251098632812, 167.0855712890625, 337.2257080078125, 258.204833984375, 60.12512969970703, 277.4456787109375, 428.69940185546875, -96.52222442626953, 351.29779052734375, 271.5313415527344, 324.6506652832031, -85.04895782470703, 327.52252197265625, 132.82371520996094, 55.49894714355469, 59.86420440673828, 71.22967529296875, 495.77978515625, 542.0916748046875, 251.66600036621094, 55.91794204711914, 364.7665710449219, 252.86119079589844, 172.15603637695312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000310.npy"}
|
|
{"epoch": 0.4552129221732746, "step": 311, "batch_size": 64, "mean": 201.23855590820312, "std": 260.6631164550781, "min": -271.769775390625, "p10": -114.54959411621091, "median": 163.13463592529297, "p90": 591.0501831054688, "max": 837.2427978515625, "pos_frac": 0.703125, "sample": [475.37969970703125, 413.10296630859375, 419.28424072265625, -1.8151168823242188, -160.090087890625, 570.8736572265625, -136.40371704101562, 323.4921875, 166.74130249023438, 599.697265625, 280.58526611328125, 79.94451904296875, 373.7470703125, 797.5931396484375, 321.91357421875, 133.975341796875, 472.4888916015625, 148.4852294921875, -32.906089782714844, 668.142822265625, 174.6905517578125, 253.43585205078125, 528.3192138671875, 234.05653381347656, 41.529197692871094, 173.2401885986328, -29.569564819335938, -184.480224609375, 247.43450927734375, 139.56109619140625, -91.86148071289062, 315.5302429199219, -176.89773559570312, 142.0481719970703, 269.20233154296875, -124.2730712890625, -6.120323181152344, 837.2427978515625, -22.473876953125, 286.5766296386719, 33.33484649658203, 76.73816680908203, 246.68826293945312, 28.89643669128418, 629.5235595703125, 366.7737121582031, 34.79463577270508, -66.81954956054688, -8.51412582397461, 99.1361083984375, -22.665851593017578, 495.7056884765625, 147.68768310546875, -1.6280288696289062, -271.769775390625, 462.394775390625, 159.52796936035156, 222.24169921875, -13.364967346191406, 269.55084228515625, 634.5780029296875, -197.62278747558594, -46.25495147705078, 678.91162109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000311.npy"}
|
|
{"epoch": 0.4566813509544787, "step": 312, "batch_size": 64, "mean": 178.09536743164062, "std": 256.8215026855469, "min": -368.0985412597656, "p10": -82.35527420043945, "median": 127.24059295654297, "p90": 493.1836730957033, "max": 1049.793212890625, "pos_frac": 0.8125, "sample": [135.12644958496094, 1049.793212890625, 533.6088256835938, 159.87545776367188, 110.04148864746094, 57.73999786376953, -79.82242584228516, 116.93038940429688, 76.49994659423828, 365.31219482421875, -239.81298828125, 425.50433349609375, 928.4793701171875, 284.67401123046875, 252.339111328125, 316.8148498535156, -236.3402099609375, 239.12896728515625, 397.19012451171875, 169.006103515625, 561.0360107421875, 91.70191955566406, 115.83285522460938, 285.68475341796875, 66.32199096679688, 7.52836799621582, 34.872642517089844, 696.80859375, 342.1078186035156, 447.7059326171875, 119.354736328125, 407.8646545410156, 173.47402954101562, 99.91069030761719, -172.8121337890625, 112.02076721191406, 176.91233825683594, 512.6741333007812, -6.2606658935546875, 27.322673797607422, 295.1807556152344, 118.63619232177734, -58.588958740234375, 252.41847229003906, 172.1576385498047, 1.08929443359375, -36.19134521484375, 665.264404296875, -83.44078063964844, 20.9207820892334, 214.96360778808594, 105.08311462402344, -209.608154296875, -10.8682861328125, -118.74031829833984, 148.17750549316406, 153.42724609375, 8.988113403320312, -368.0985412597656, 293.9274597167969, 258.5928649902344, 16.510818481445312, 60.14534378051758, 336.0050964355469], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000312.npy"}
|
|
{"epoch": 0.4581497797356828, "step": 313, "batch_size": 64, "mean": 192.1097869873047, "std": 263.3614196777344, "min": -493.29608154296875, "p10": -96.71422271728512, "median": 172.13155364990234, "p90": 505.5260131835938, "max": 849.9840087890625, "pos_frac": 0.78125, "sample": [89.89529418945312, 329.02783203125, 106.0545654296875, 333.72894287109375, -161.20147705078125, -4.4529571533203125, 509.9759521484375, 173.69903564453125, 394.4256896972656, -47.923641204833984, 39.87709045410156, -150.42201232910156, 481.84649658203125, 185.66123962402344, 21.013025283813477, 170.56407165527344, 432.5274963378906, 232.7852325439453, 97.8336181640625, 213.63916015625, 103.81037139892578, 338.8435363769531, -493.29608154296875, -110.33515930175781, 213.36953735351562, 556.3234252929688, 280.56494140625, 100.04426574707031, 238.94247436523438, 849.9840087890625, 333.9068603515625, 129.3687744140625, -38.05474853515625, 164.33128356933594, -253.62307739257812, 168.3539276123047, -64.93203735351562, -470.64361572265625, 430.8307800292969, 249.3370819091797, -15.004966735839844, 258.851806640625, 56.78189468383789, 320.0040283203125, 300.8426818847656, 125.25492095947266, -18.044456481933594, 337.9637451171875, 318.26898193359375, -173.66714477539062, 495.142822265625, 88.0589599609375, 145.27325439453125, -0.325225830078125, 244.50698852539062, 629.1770629882812, 603.7904052734375, 14.551956176757812, 21.628990173339844, 78.83851623535156, 290.6239318847656, 405.13482666015625, 762.4763793945312, 829.2149047851562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000313.npy"}
|
|
{"epoch": 0.45961820851688695, "step": 314, "batch_size": 64, "mean": 175.31007385253906, "std": 274.52978515625, "min": -365.04473876953125, "p10": -163.6714874267578, "median": 128.30367279052734, "p90": 482.7947448730469, "max": 865.8029174804688, "pos_frac": 0.71875, "sample": [754.6998291015625, -295.7261047363281, 99.8455581665039, 54.18810272216797, 44.013519287109375, 246.99954223632812, 71.41178894042969, 325.26025390625, 78.8023681640625, -128.1767120361328, 373.55810546875, 3.9533538818359375, 266.2462158203125, 111.60636901855469, 181.77200317382812, -365.04473876953125, -175.82737731933594, 28.023483276367188, 702.3209838867188, 750.2601928710938, 30.431442260742188, -44.35742950439453, -164.0716094970703, 23.843990325927734, 175.73809814453125, -150.11851501464844, -0.1976165771484375, -162.7378692626953, -14.489952087402344, 427.6994323730469, 462.726806640625, 34.89788818359375, 287.6722717285156, 468.96038818359375, 310.1611022949219, 169.23976135253906, -108.1357650756836, 145.0009765625, 36.1287841796875, 780.967041015625, 437.4858093261719, 389.7607727050781, 865.8029174804688, 488.7237548828125, -95.64622497558594, 346.42083740234375, -177.25961303710938, 458.360107421875, -25.105178833007812, 231.68621826171875, 332.50164794921875, 46.436309814453125, 316.5045166015625, 376.9803161621094, 457.1431579589844, 498.96063232421875, 94.70881652832031, 231.04931640625, 233.30149841308594, -4.521980285644531, -190.5017547607422, -42.11891555786133, 316.53271484375, -204.9071502685547], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000314.npy"}
|
|
{"epoch": 0.461086637298091, "step": 315, "batch_size": 64, "mean": 165.16049194335938, "std": 214.72100830078125, "min": -178.2917022705078, "p10": -76.56481475830077, "median": 135.1613006591797, "p90": 496.6761871337891, "max": 717.8801879882812, "pos_frac": 0.75, "sample": [565.0637817382812, -54.34320831298828, 297.02252197265625, 20.2491455078125, -53.656002044677734, 71.91229248046875, 58.92829132080078, 156.20265197753906, 270.63201904296875, -25.679611206054688, 302.41778564453125, 39.099609375, -89.85054016113281, 258.6981201171875, 85.50041961669922, 545.1181640625, 371.8363037109375, 160.3770294189453, -138.33749389648438, 365.599609375, 201.5020751953125, 500.5302734375, 549.1675415039062, 33.940399169921875, -126.86419677734375, 176.71630859375, 244.2054443359375, 186.0092315673828, 398.5129699707031, 136.6848907470703, 461.52117919921875, 133.63771057128906, 84.3916015625, 117.56951904296875, 207.61167907714844, 331.9203186035156, -113.73190307617188, -7.973480224609375, 231.20152282714844, 23.116836547851562, 42.406314849853516, -65.83441162109375, 717.8801879882812, 70.86103057861328, 214.50674438476562, -10.040885925292969, -157.19070434570312, 26.03465461730957, -81.16355895996094, 600.5821533203125, 108.76791381835938, 231.9250946044922, 636.9807739257812, 139.65530395507812, 166.7551727294922, -54.05499267578125, 290.9178161621094, -8.13979721069336, 391.4857482910156, 487.6833190917969, -178.2917022705078, -55.32440185546875, 61.73282241821289, 15.676475524902344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000315.npy"}
|
|
{"epoch": 0.46255506607929514, "step": 316, "batch_size": 64, "mean": 228.41299438476562, "std": 252.40106201171875, "min": -258.26971435546875, "p10": -15.018904495239251, "median": 161.75245666503906, "p90": 596.9120544433595, "max": 931.166259765625, "pos_frac": 0.875, "sample": [319.017578125, 123.31224060058594, 487.00872802734375, 66.88461303710938, 56.10234832763672, 931.166259765625, 175.7015380859375, 702.1829223632812, 453.4404296875, 638.3446655273438, 379.5655212402344, 326.30474853515625, 32.921142578125, 22.21835708618164, 35.847320556640625, 211.47731018066406, 61.4534797668457, 128.48086547851562, 9.843807220458984, 425.43707275390625, 162.08657836914062, 25.63309097290039, 605.9864501953125, 414.2139892578125, 270.9692687988281, 112.2491683959961, 56.63968276977539, 669.0108642578125, 143.5251007080078, 506.99505615234375, 67.83435821533203, 161.4183349609375, 48.828643798828125, 160.44578552246094, 238.76834106445312, 313.190185546875, 385.17584228515625, 55.51581573486328, 294.0257568359375, 20.69147491455078, 575.7384643554688, 318.4387512207031, 39.34271240234375, 398.5307922363281, 191.0259552001953, 145.27392578125, -258.26971435546875, -17.559803009033203, 912.9297485351562, 152.56602478027344, 801.858154296875, -252.06036376953125, 180.83377075195312, 195.6562957763672, 384.2579345703125, -9.090141296386719, -79.40364837646484, -43.67597961425781, -32.031349182128906, 122.1115951538086, 249.29571533203125, -93.08699798583984, 98.04360961914062, 337.79083251953125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000316.npy"}
|
|
{"epoch": 0.46402349486049926, "step": 317, "batch_size": 64, "mean": 247.00172424316406, "std": 235.42642211914062, "min": -162.41775512695312, "p10": -39.786382293701166, "median": 241.03556060791016, "p90": 568.2134948730469, "max": 712.004638671875, "pos_frac": 0.8125, "sample": [362.11395263671875, 2.1815567016601562, 108.0496597290039, 480.9471130371094, 469.0879821777344, 78.66549682617188, 447.21173095703125, 381.84771728515625, -162.41775512695312, 569.1392211914062, 711.2236938476562, 130.04258728027344, 260.39276123046875, 1.23260498046875, 152.91885375976562, 400.356689453125, 182.30613708496094, -36.503257751464844, 107.53003692626953, 402.3770446777344, -26.298095703125, 344.7557373046875, 373.3273620605469, 489.73486328125, 17.431102752685547, 529.58203125, -21.52480697631836, 111.2323226928711, 712.004638671875, 432.6451416015625, 644.943115234375, 447.35467529296875, 149.3986358642578, 566.053466796875, 436.16314697265625, -54.69660949707031, 269.4302978515625, 95.08912658691406, 270.0880126953125, 90.77484130859375, 186.04229736328125, 533.6196899414062, -22.087133407592773, -41.19343566894531, 654.7380981445312, 228.29818725585938, 591.5205078125, 323.51470947265625, 265.5857849121094, -78.74019622802734, 129.22850036621094, 96.63794708251953, 253.77293395996094, -20.482383728027344, -91.55780792236328, 122.42313385009766, 95.5914306640625, -114.07320404052734, 650.2186279296875, 406.93603515625, 74.93988800048828, 381.2688903808594, -126.81300354003906, 382.5273742675781], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000317.npy"}
|
|
{"epoch": 0.4654919236417034, "step": 318, "batch_size": 64, "mean": 184.75576782226562, "std": 234.08702087402344, "min": -328.91656494140625, "p10": -72.07937698364258, "median": 156.11519622802734, "p90": 500.5004882812501, "max": 840.3966064453125, "pos_frac": 0.796875, "sample": [70.6219253540039, -61.51887512207031, 840.3966064453125, 86.99383544921875, 257.22930908203125, 101.07691955566406, -68.67052459716797, 191.81573486328125, 128.89639282226562, 653.1346435546875, 27.183618545532227, 109.92657470703125, 301.27044677734375, 515.3358764648438, 601.8093872070312, 257.6478271484375, -12.591278076171875, 127.15040588378906, -81.29375457763672, -16.96439552307129, 567.5418090820312, 64.1970443725586, -131.2781524658203, 465.88458251953125, 794.925537109375, 312.60870361328125, -61.555477142333984, -88.95869445800781, 235.8647003173828, 372.9974670410156, 78.93307495117188, 333.0316467285156, 74.09186553955078, 415.7851867675781, 223.0786590576172, 168.40048217773438, 552.8175659179688, 9.885231018066406, -39.996124267578125, 444.3735046386719, 431.51629638671875, 211.88272094726562, 8.001758575439453, -84.4896011352539, 305.95391845703125, 216.41244506835938, 87.0779037475586, 180.38864135742188, 143.8299102783203, -73.54031372070312, -328.91656494140625, 36.08466339111328, 247.94891357421875, 178.27972412109375, 360.76837158203125, 350.795654296875, 236.34588623046875, 139.26910400390625, 57.49781799316406, 254.54415893554688, 128.13916015625, 207.11541748046875, -327.36859130859375, 34.75157928466797], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000318.npy"}
|
|
{"epoch": 0.4669603524229075, "step": 319, "batch_size": 64, "mean": 222.0023193359375, "std": 252.32803344726562, "min": -372.4964599609375, "p10": -120.6470245361328, "median": 225.4190216064453, "p90": 543.6395324707032, "max": 1003.0226440429688, "pos_frac": 0.828125, "sample": [317.44598388671875, 162.54408264160156, 309.1714782714844, 251.89920043945312, 78.02354431152344, 246.0322265625, 368.1780090332031, 522.8788452148438, 238.39877319335938, 89.1339340209961, -98.13455200195312, -251.83309936523438, 320.1193542480469, 110.69166564941406, 297.432861328125, 137.37876892089844, 56.002296447753906, 256.07806396484375, 250.86151123046875, 196.61795043945312, 185.59056091308594, 408.94049072265625, 193.23056030273438, 252.02017211914062, 316.4429931640625, 54.7176513671875, 389.6347351074219, 803.96337890625, 455.24188232421875, 374.40447998046875, -127.58193969726562, 195.62278747558594, 175.9086456298828, 528.1675415039062, -282.99420166015625, 390.00299072265625, -372.4964599609375, -115.81442260742188, -122.7181396484375, 396.2406005859375, 552.1956787109375, 453.2453918457031, 574.0309448242188, 6.678865432739258, 169.65586853027344, 18.887161254882812, 551.0017700195312, 593.5833740234375, 261.530029296875, 550.2703857421875, 1003.0226440429688, -163.7360076904297, 158.21231079101562, 433.263671875, -21.795001983642578, 38.394134521484375, -160.39016723632812, -33.45353698730469, 338.854736328125, 166.55287170410156, 144.04803466796875, 212.43927001953125, 248.04953002929688, 156.16188049316406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000319.npy"}
|
|
{"epoch": 0.4684287812041116, "step": 320, "batch_size": 64, "mean": 243.22470092773438, "std": 261.526123046875, "min": -242.49224853515625, "p10": -69.13655853271484, "median": 231.1150131225586, "p90": 572.1887939453126, "max": 959.8372802734375, "pos_frac": 0.796875, "sample": [140.54794311523438, 185.04656982421875, 946.4464111328125, 124.19676208496094, 281.4066162109375, -120.8584213256836, 190.72756958007812, -242.49224853515625, 816.8746948242188, 241.2180633544922, 421.1827392578125, 376.7364807128906, -43.41363525390625, 376.7469787597656, 220.1282958984375, 549.820068359375, 609.3224487304688, 402.2926330566406, 372.8825988769531, -192.24273681640625, 370.1266174316406, 373.83203125, -71.53995513916016, 227.2562255859375, 79.853515625, 10.971672058105469, 276.4117431640625, 287.0079345703125, -46.89988708496094, 581.775390625, -35.26011657714844, 242.41668701171875, -63.52863311767578, 142.97879028320312, 469.79656982421875, 97.23793029785156, 168.564697265625, 209.73924255371094, 959.8372802734375, 400.0166320800781, 398.7862243652344, 118.82715606689453, 128.52110290527344, -57.3582763671875, 660.1788330078125, 60.747337341308594, 399.3731994628906, 211.97474670410156, -2.879331588745117, 27.041976928710938, 387.4904479980469, 376.66412353515625, 294.9906005859375, 435.53887939453125, 484.84405517578125, 606.41357421875, 529.1119995117188, 2.8395214080810547, -115.04898071289062, -74.96639251708984, 49.044281005859375, 234.9738006591797, 241.93170166015625, -169.8238525390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000320.npy"}
|
|
{"epoch": 0.4698972099853157, "step": 321, "batch_size": 64, "mean": 208.4459228515625, "std": 271.16741943359375, "min": -364.32830810546875, "p10": -91.82781677246092, "median": 182.05783081054688, "p90": 567.5824707031251, "max": 1031.8394775390625, "pos_frac": 0.78125, "sample": [683.5243530273438, 130.28323364257812, 296.79595947265625, 245.33663940429688, -45.978431701660156, 275.1352844238281, 361.5617370605469, 331.5809020996094, 177.012451171875, 480.366943359375, 419.4529724121094, 366.17755126953125, 63.791595458984375, 51.36967086791992, -242.59349060058594, 241.87913513183594, 433.9005126953125, 268.0977783203125, 238.39089965820312, 572.7130126953125, 759.943359375, 158.32843017578125, -364.32830810546875, 292.0887756347656, 215.8026580810547, 303.8985900878906, 764.24169921875, -269.4001159667969, -109.83457946777344, 141.97122192382812, 54.61427307128906, 555.6112060546875, -33.92303466796875, 379.5008544921875, 135.80865478515625, 407.1680603027344, 232.8457489013672, -288.66900634765625, 1031.8394775390625, 91.44031524658203, -27.900146484375, 97.65840911865234, 17.400476455688477, 104.80989074707031, -54.566627502441406, 155.69277954101562, -99.6160888671875, -47.9938850402832, 182.63693237304688, 268.4227294921875, 688.6055908203125, 676.6812744140625, -14.295360565185547, -242.54425048828125, 174.40472412109375, 181.47872924804688, 169.367431640625, 148.84352111816406, 1.7991657257080078, 479.78509521484375, 217.8841552734375, 278.5042724609375, 249.3885498046875, -73.65518188476562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000321.npy"}
|
|
{"epoch": 0.4713656387665198, "step": 322, "batch_size": 64, "mean": 223.640869140625, "std": 234.4007110595703, "min": -300.8452453613281, "p10": -43.752029418945305, "median": 245.9314727783203, "p90": 520.2057189941406, "max": 801.2529296875, "pos_frac": 0.765625, "sample": [-196.26565551757812, 734.0897216796875, 308.6898498535156, 801.2529296875, 121.62752532958984, -28.149906158447266, 507.8489990234375, 362.1382141113281, 322.8818054199219, -62.35226058959961, -47.41703796386719, -5.565788269042969, 349.20440673828125, 272.398681640625, 297.7933044433594, 349.8720703125, -28.416099548339844, 238.55545043945312, -20.93328094482422, -101.59967041015625, 559.084228515625, 525.4041748046875, 212.40322875976562, 580.6292114257812, -46.413330078125, 217.5767822265625, 361.3937072753906, 533.5556640625, 18.600440979003906, 508.07598876953125, -37.542327880859375, 179.5604248046875, -11.598583221435547, 380.51025390625, -4.145355224609375, 16.76729965209961, 296.81231689453125, 195.99400329589844, 2.6206703186035156, 334.42523193359375, 444.27362060546875, 469.3653564453125, 422.480224609375, -103.7612533569336, 441.63677978515625, 108.94096374511719, 289.90191650390625, -14.043193817138672, 253.3074951171875, 7.9140472412109375, 115.63092803955078, 667.9110717773438, 196.85879516601562, 325.0265808105469, 300.9790954589844, 339.24627685546875, 86.1214599609375, 69.40467071533203, 57.29505920410156, -300.8452453613281, 39.818084716796875, 373.7205810546875, 462.2416687011719, 260.2227783203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000322.npy"}
|
|
{"epoch": 0.47283406754772395, "step": 323, "batch_size": 64, "mean": 197.70355224609375, "std": 248.60272216796875, "min": -412.65435791015625, "p10": -102.58516387939451, "median": 189.89761352539062, "p90": 533.8467712402344, "max": 798.150390625, "pos_frac": 0.8125, "sample": [390.3080749511719, 252.62887573242188, 567.6737670898438, -412.65435791015625, 618.8944091796875, -305.2900085449219, 573.1348266601562, 798.150390625, 30.422962188720703, 148.87115478515625, 255.08486938476562, 46.092559814453125, 187.77041625976562, 140.89698791503906, 15.262947082519531, 336.1768798828125, 93.95673370361328, 306.9682922363281, -120.55484771728516, 650.3746948242188, -10.514106750488281, 77.77714538574219, 508.07049560546875, -109.3923568725586, -111.63575744628906, 390.3366394042969, 159.57945251464844, 141.90184020996094, 408.9393310546875, 30.41265869140625, 537.19384765625, 287.52490234375, 497.080078125, -9.657072067260742, 311.96136474609375, 22.462692260742188, -2.3662261962890625, 193.57064819335938, 305.8076171875, 229.08755493164062, 302.8504638671875, 29.1298828125, 255.65771484375, 526.0369262695312, -307.8113708496094, 182.7241973876953, 308.49072265625, 657.2833862304688, 392.3838806152344, -56.784263610839844, 228.91714477539062, 508.42169189453125, -86.70171356201172, 352.736572265625, -219.81346130371094, 192.02481079101562, 97.5262451171875, 242.6458740234375, 102.00582122802734, 277.4289245605469, 24.25035858154297, 24.075069427490234, 132.79946899414062, 54.4381103515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000323.npy"}
|
|
{"epoch": 0.47430249632892807, "step": 324, "batch_size": 64, "mean": 188.0611572265625, "std": 287.52703857421875, "min": -382.5910949707031, "p10": -135.33743438720703, "median": 151.4688720703125, "p90": 477.1989440917969, "max": 1001.303955078125, "pos_frac": 0.75, "sample": [-232.43783569335938, 687.1480102539062, -12.39404296875, 210.97265625, 14.282918930053711, 93.81365966796875, 219.91864013671875, 279.03839111328125, 143.5800018310547, 246.91806030273438, 851.4859619140625, -42.962738037109375, 156.75296020507812, 105.93450927734375, -17.506488800048828, 691.5594482421875, -235.55650329589844, -377.360595703125, 346.245361328125, 314.1640319824219, 25.046157836914062, 146.18478393554688, -167.55328369140625, 234.91976928710938, 336.85467529296875, 920.7937622070312, 479.3148193359375, -135.80857849121094, 117.08908081054688, 417.2204284667969, 403.9119567871094, 160.2746124267578, 83.06926727294922, 113.1009292602539, 330.9041442871094, 315.575927734375, 143.85816955566406, 317.5897216796875, 235.18637084960938, -3.184671401977539, 457.85247802734375, -347.24407958984375, -134.23809814453125, 363.4209899902344, 472.26190185546875, 387.88250732421875, 67.31857299804688, -382.5910949707031, -62.876556396484375, 110.21686553955078, 282.03973388671875, 86.86492156982422, 267.49896240234375, -65.60354614257812, 115.49974060058594, 25.863431930541992, 417.0491027832031, -108.4847640991211, 2.5740623474121094, 1001.303955078125, 697.8776245117188, 158.07130432128906, -30.764785766601562, 336.1758117675781], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000324.npy"}
|
|
{"epoch": 0.47577092511013214, "step": 325, "batch_size": 64, "mean": 157.7469482421875, "std": 221.32850646972656, "min": -355.9023132324219, "p10": -77.53672866821287, "median": 143.03072357177734, "p90": 452.8746917724612, "max": 846.580810546875, "pos_frac": 0.8125, "sample": [-355.0663757324219, -355.9023132324219, 196.97659301757812, -8.963981628417969, 355.0848083496094, 250.85089111328125, 145.7379150390625, -43.52873229980469, 162.92892456054688, 262.8377685546875, 291.1019287109375, 488.97589111328125, 265.34259033203125, 163.8310546875, 156.6782684326172, 132.99119567871094, 9.706916809082031, 74.72053527832031, 48.59104919433594, 237.52450561523438, -118.80028533935547, 236.63812255859375, 283.8506774902344, 537.556884765625, 23.954303741455078, 28.005882263183594, 140.3235321044922, 115.81926727294922, 118.99891662597656, 17.642478942871094, 527.470947265625, -55.43943786621094, 846.580810546875, -12.03280258178711, 539.214599609375, 16.60198211669922, 480.63720703125, -152.0837860107422, 264.9840393066406, 225.3777618408203, -87.00699615478516, 129.88262939453125, 388.0954895019531, -16.462587356567383, 305.36895751953125, 295.7949523925781, 179.00100708007812, 289.24737548828125, 138.26492309570312, 152.60140991210938, 296.33074951171875, -307.765869140625, 23.231903076171875, 672.67138671875, 27.67169189453125, 26.744300842285156, 386.3934020996094, 87.49491119384766, -118.01974487304688, 247.26104736328125, 134.73731994628906, 34.6306037902832, 60.53227996826172, 203.38272094726562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000325.npy"}
|
|
{"epoch": 0.47723935389133626, "step": 326, "batch_size": 64, "mean": 145.47195434570312, "std": 194.49720764160156, "min": -387.2820739746094, "p10": -41.964133834838854, "median": 145.63098907470703, "p90": 385.21406860351567, "max": 623.6028442382812, "pos_frac": 0.796875, "sample": [-12.1405029296875, -25.602413177490234, 52.58009338378906, 388.82049560546875, 269.6610107421875, -10.964324951171875, 152.52899169921875, 149.76182556152344, 293.5530700683594, -244.75784301757812, -181.61700439453125, -387.2820739746094, 584.7457885742188, 52.090476989746094, -143.7689666748047, 141.50015258789062, 217.05152893066406, 118.7843017578125, 308.296142578125, 234.08489990234375, -7.514434814453125, 23.476470947265625, 156.5685577392578, 208.27944946289062, -26.689773559570312, 161.5740966796875, 608.796142578125, 196.1216583251953, 96.54367065429688, 97.71868896484375, 388.1070251464844, 204.06838989257812, 229.7108917236328, 121.86083984375, 208.0863037109375, 2.8045654296875, 51.767608642578125, 406.1544189453125, -48.51028823852539, 67.90475463867188, 118.63258361816406, -61.2559814453125, 81.8892822265625, 165.82794189453125, 60.701568603515625, 98.51988220214844, 157.44097900390625, 341.0638732910156, 238.86329650878906, 223.9142303466797, -0.5971527099609375, 378.4638366699219, 623.6028442382812, 105.2051010131836, 336.40966796875, 156.7845458984375, 368.95526123046875, 226.7254638671875, 13.795888900756836, -320.63916015625, 424.658935546875, 313.52227783203125, 78.46275329589844, 75.10216522216797], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000326.npy"}
|
|
{"epoch": 0.4787077826725404, "step": 327, "batch_size": 64, "mean": 222.72535705566406, "std": 221.71058654785156, "min": -248.32630920410156, "p10": -94.48186569213867, "median": 248.22679901123047, "p90": 471.01643676757817, "max": 760.8448486328125, "pos_frac": 0.828125, "sample": [215.03982543945312, 175.8917694091797, 410.155029296875, 208.39999389648438, 262.73828125, 319.468994140625, 97.17752838134766, 253.7537841796875, 319.779541015625, 227.69451904296875, 323.2920837402344, 314.94024658203125, 117.84608459472656, 323.32891845703125, -87.54529571533203, 419.6227722167969, -45.63576889038086, 426.8402404785156, 344.2334899902344, 294.6495361328125, -209.93377685546875, 520.11474609375, 466.2928466796875, 287.708251953125, 149.4389190673828, 473.04083251953125, 301.75439453125, -179.8163604736328, 190.5887908935547, 266.5656433105469, 333.40570068359375, -104.04400634765625, -27.25640869140625, 177.4181671142578, 312.77410888671875, 168.68800354003906, 242.69981384277344, 263.2388916015625, 275.5964660644531, 161.99374389648438, 52.56349182128906, 82.60675048828125, -121.54116821289062, 453.53436279296875, 208.54580688476562, -97.45468139648438, 3.374286651611328, 413.81842041015625, 366.5394287109375, 143.41690063476562, 721.4030151367188, 73.59019470214844, 760.8448486328125, -248.32630920410156, 473.5534973144531, -52.968505859375, 85.10542297363281, 571.48046875, 94.07856750488281, 367.8140563964844, 413.3249206542969, 647.8685302734375, 86.41010284423828, -237.10025024414062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000327.npy"}
|
|
{"epoch": 0.4801762114537445, "step": 328, "batch_size": 64, "mean": 209.12203979492188, "std": 258.68145751953125, "min": -446.1654968261719, "p10": -61.02054748535156, "median": 193.39926147460938, "p90": 507.1850982666016, "max": 929.562255859375, "pos_frac": 0.71875, "sample": [226.5391082763672, -179.23110961914062, 160.32620239257812, 205.02798461914062, -28.69415283203125, 380.467529296875, 329.2895202636719, 119.25550842285156, 41.22947692871094, -446.1654968261719, 98.86658477783203, 505.5382995605469, 507.890869140625, 346.6542663574219, 363.67779541015625, 488.42437744140625, -41.20281219482422, -81.56474304199219, -39.65826416015625, 156.75592041015625, -56.93398666381836, -29.43609619140625, 323.8504333496094, 929.562255859375, 225.607177734375, 335.1534729003906, 374.4997253417969, 158.69479370117188, 664.7785034179688, 411.7586364746094, -116.31356811523438, 205.41854858398438, 287.71087646484375, -62.77193069458008, 116.32331848144531, 141.31613159179688, 314.3697204589844, 450.8025207519531, 181.77053833007812, 178.3040008544922, -8.165632247924805, 213.52981567382812, -96.48616027832031, 250.32504272460938, -32.78339385986328, 483.40533447265625, 140.2130889892578, 242.9452362060547, 273.03753662109375, 214.3375244140625, 611.822998046875, 472.9463195800781, 54.97865295410156, -53.317230224609375, -28.361507415771484, 817.7789306640625, 838.2005615234375, 69.49566650390625, 556.4287719726562, -27.10836410522461, -15.963371276855469, 296.311279296875, 92.8853759765625, -130.53787231445312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000328.npy"}
|
|
{"epoch": 0.48164464023494863, "step": 329, "batch_size": 64, "mean": 218.89981079101562, "std": 300.8843994140625, "min": -810.2685546875, "p10": -87.71152038574218, "median": 183.1309356689453, "p90": 625.0733642578125, "max": 985.7703247070312, "pos_frac": 0.796875, "sample": [139.35726928710938, 390.41326904296875, 547.9524536132812, 331.60198974609375, 339.0403747558594, -132.19691467285156, 519.4761962890625, 33.466285705566406, 521.8173217773438, 183.611572265625, -12.1883544921875, 400.3188781738281, 75.44635772705078, -67.35722351074219, 80.68388366699219, 161.94815063476562, 202.34925842285156, 182.65029907226562, 58.351287841796875, 879.60498046875, 149.1455078125, 110.78974914550781, 148.6944122314453, 131.4952392578125, -810.2685546875, 376.8743591308594, 40.441429138183594, 985.7703247070312, 672.9124755859375, -94.38038635253906, 210.709716796875, -34.71000289916992, 391.34942626953125, 80.7947006225586, 220.2851104736328, 483.5408630371094, 639.8079833984375, 383.38568115234375, 736.2340087890625, 184.43467712402344, 19.11825942993164, -72.15083312988281, -183.2757110595703, 630.7001953125, 481.0054016113281, -144.98928833007812, 303.0652160644531, -130.35206604003906, 353.3150634765625, 153.5653076171875, 213.88909912109375, -55.12178039550781, 261.10589599609375, 249.46383666992188, 78.51758575439453, 49.397666931152344, 250.7424774169922, 467.77294921875, 817.5833740234375, -57.23046875, -316.2895812988281, 153.70663452148438, 611.944091796875, 30.451087951660156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000329.npy"}
|
|
{"epoch": 0.4831130690161527, "step": 330, "batch_size": 64, "mean": 235.27944946289062, "std": 270.6396789550781, "min": -287.7521667480469, "p10": -67.60581054687499, "median": 220.24581146240234, "p90": 502.33540039062507, "max": 962.2716064453125, "pos_frac": 0.796875, "sample": [490.212890625, -64.21783447265625, -158.65933227539062, -52.52763748168945, 369.3948974609375, 482.24847412109375, 144.50608825683594, 179.45086669921875, -23.186182022094727, 58.05754852294922, -141.11729431152344, 133.03436279296875, 136.41897583007812, 444.83544921875, 507.53076171875, 44.35388946533203, 296.7886047363281, 267.0201110839844, 273.1759033203125, 449.3501892089844, -54.80274963378906, 120.40432739257812, 314.6631164550781, 70.62193298339844, 182.5186767578125, 329.0164489746094, -251.41116333007812, 368.5846252441406, 239.8097686767578, 164.57244873046875, 454.3239440917969, 3.002704620361328, -287.7521667480469, 297.1682434082031, 489.3543395996094, 200.68185424804688, 59.55561828613281, 865.247802734375, 20.112525939941406, 156.98716735839844, -13.449073791503906, 463.68890380859375, 123.21577453613281, 962.2716064453125, -240.8936004638672, 849.876220703125, 765.7274169921875, 377.4294128417969, 405.6248779296875, 89.09495544433594, 354.7568054199219, 526.6287231445312, 195.44705200195312, 744.1487426757812, -11.610633850097656, 309.3965148925781, 260.95538330078125, 456.9267578125, -153.93875122070312, 411.666748046875, 45.255859375, 311.31121826171875, 314.080810546875, -69.05780029296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000330.npy"}
|
|
{"epoch": 0.4845814977973568, "step": 331, "batch_size": 64, "mean": 227.6820831298828, "std": 201.87876892089844, "min": -174.10000610351562, "p10": 15.539501190185563, "median": 198.41211700439453, "p90": 446.2546539306641, "max": 854.3159790039062, "pos_frac": 0.90625, "sample": [164.05381774902344, 744.6502685546875, 285.5531005859375, -27.990570068359375, 279.860595703125, 386.520263671875, 52.821075439453125, 48.53318786621094, 224.60862731933594, 145.33258056640625, 727.0174560546875, 324.9669494628906, 854.3159790039062, 31.36023712158203, 173.87379455566406, 362.8977966308594, 644.5108642578125, 117.32466125488281, 146.12310791015625, 222.97622680664062, 453.9986877441406, 428.18524169921875, 466.5025634765625, -77.21974182128906, 117.22714233398438, 188.2744598388672, 193.83592224121094, 315.1023864746094, 250.16845703125, -64.03436279296875, 245.4195098876953, -110.02607727050781, 313.443115234375, 193.8112335205078, 33.30065155029297, 175.72378540039062, -174.10000610351562, 202.98831176757812, 320.72613525390625, 594.2507934570312, 268.5122375488281, 193.00648498535156, 135.9503936767578, 374.80792236328125, 91.93729400634766, 97.34423828125, 251.87725830078125, 159.4771728515625, 257.98248291015625, 72.75859069824219, -168.35357666015625, 295.52734375, 107.24372863769531, 82.37217712402344, 337.1734313964844, 354.59637451171875, 301.13201904296875, 68.9207534790039, 165.8662567138672, 372.75140380859375, 145.30503845214844, 280.3493347167969, 339.46746826171875, 8.759185791015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000331.npy"}
|
|
{"epoch": 0.48604992657856094, "step": 332, "batch_size": 64, "mean": 162.39520263671875, "std": 233.09779357910156, "min": -195.85208129882812, "p10": -106.2402458190918, "median": 121.48944854736328, "p90": 459.81515197753913, "max": 947.96484375, "pos_frac": 0.765625, "sample": [722.3790283203125, 143.10958862304688, 150.01087951660156, 305.61553955078125, 19.31005859375, 20.606430053710938, 47.92668151855469, 425.8486328125, 413.161376953125, 184.60740661621094, -43.23487854003906, 61.16053771972656, -102.310302734375, 947.96484375, 184.99118041992188, 78.34749603271484, 128.7288818359375, 131.0206756591797, 81.69428253173828, -9.95567512512207, 135.53363037109375, 86.86802673339844, 464.4305725097656, -126.34214782714844, 355.17156982421875, 6.305103302001953, -112.30062866210938, 100.61211395263672, 627.93115234375, 215.78570556640625, 146.6237335205078, 214.53091430664062, 290.0056457519531, -107.92450714111328, -56.03253936767578, 79.16570281982422, 499.9503173828125, 495.5030212402344, -129.98764038085938, 248.69569396972656, 166.40127563476562, -195.85208129882812, 319.35015869140625, 60.79588317871094, 301.7121887207031, 688.9890747070312, 114.25001525878906, 449.04583740234375, -79.97122192382812, 82.93681335449219, -194.07305908203125, 225.91799926757812, 390.5665283203125, -141.90335083007812, 379.1773681640625, 27.27884292602539, -95.90141296386719, 2.437305450439453, -71.92701721191406, 145.57452392578125, 307.20416259765625, 113.3281478881836, 111.08268737792969, -38.63690185546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000332.npy"}
|
|
{"epoch": 0.48751835535976507, "step": 333, "batch_size": 64, "mean": 215.18930053710938, "std": 298.3913879394531, "min": -333.917236328125, "p10": -182.7840606689453, "median": 203.5207290649414, "p90": 574.4554565429688, "max": 949.4278564453125, "pos_frac": 0.75, "sample": [-31.821029663085938, 294.71087646484375, 949.4278564453125, -13.494583129882812, 108.1042251586914, 57.49815368652344, 298.9273681640625, 257.5733642578125, 39.90771484375, 39.974178314208984, 745.9312133789062, -172.78494262695312, 215.63934326171875, -228.07589721679688, 542.593017578125, -270.4250793457031, 699.1069946289062, -103.18319702148438, 653.018798828125, 428.1296691894531, -4.864200592041016, 303.8964538574219, 0.0689544677734375, 350.86968994140625, 459.31512451171875, 555.1519775390625, -187.06939697265625, 111.79193115234375, 60.2728271484375, 417.7791442871094, 150.3307647705078, 2.0727672576904297, -78.5137939453125, 517.0474243164062, 558.4368896484375, 420.18212890625, -265.8327941894531, 11.230236053466797, 553.9495849609375, 446.9399108886719, 441.43707275390625, -233.0352325439453, -14.276283264160156, 496.8572082519531, 466.64373779296875, -37.176429748535156, 198.43516540527344, 160.02455139160156, 740.640869140625, 261.5515441894531, 474.10479736328125, 288.8548278808594, 581.320556640625, 158.78067016601562, 208.60629272460938, -145.91639709472656, 400.4124755859375, 3.6482887268066406, 342.5771484375, -333.917236328125, 619.1239624023438, -314.4457702636719, 14.667526245117188, 99.38301086425781], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000333.npy"}
|
|
{"epoch": 0.4889867841409692, "step": 334, "batch_size": 64, "mean": 187.9039306640625, "std": 294.8891906738281, "min": -549.85791015625, "p10": -125.66009597778319, "median": 150.69841766357422, "p90": 634.0444641113282, "max": 803.1802978515625, "pos_frac": 0.796875, "sample": [605.1008911132812, 9.590057373046875, -173.42620849609375, 485.5108642578125, 232.25967407226562, 40.529151916503906, -72.72905731201172, 42.588661193847656, 243.61329650878906, 28.243316650390625, 220.6317138671875, 13.036033630371094, 358.1537780761719, 211.64715576171875, -131.7790069580078, 200.54774475097656, 43.82017517089844, 424.2208251953125, -549.85791015625, 646.4488525390625, 51.795719146728516, 224.06048583984375, -7.731475830078125, -282.257568359375, 372.58343505859375, 256.6308288574219, 460.83465576171875, -402.980712890625, -220.72682189941406, -42.24348449707031, 33.95172119140625, -28.822887420654297, 123.36448669433594, 59.49774169921875, 737.7747192382812, 803.1802978515625, 312.2433166503906, 24.89666175842285, 77.37696838378906, 499.08453369140625, 83.13146209716797, 252.5162353515625, 553.2393798828125, 410.32281494140625, -77.06819152832031, 73.00213623046875, 767.88134765625, 38.060272216796875, 770.1214599609375, 481.68194580078125, -111.38263702392578, -385.60797119140625, 32.081787109375, 260.0702819824219, 201.65199279785156, 660.7373657226562, 453.3428649902344, 115.87336730957031, 45.84654235839844, 360.881591796875, 48.78517150878906, 674.2673950195312, 178.0323486328125, 207.7198486328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000334.npy"}
|
|
{"epoch": 0.49045521292217326, "step": 335, "batch_size": 64, "mean": 149.90032958984375, "std": 259.487548828125, "min": -294.9684143066406, "p10": -196.21173858642575, "median": 114.5254898071289, "p90": 436.3501831054688, "max": 973.1768188476562, "pos_frac": 0.734375, "sample": [171.04229736328125, 353.5323486328125, 430.3994140625, 10.34686279296875, 178.9490509033203, -71.46170806884766, 95.12866973876953, 438.9005126953125, 63.587894439697266, 42.83782196044922, -219.88771057128906, 49.31004333496094, 276.1690673828125, 250.72584533691406, 24.749610900878906, 631.1160278320312, 13.358940124511719, 106.12808990478516, -238.8546905517578, 226.1151123046875, -119.41213989257812, 53.94932556152344, 189.58489990234375, 130.47433471679688, 686.5004272460938, 533.6301879882812, 389.6106872558594, 264.62213134765625, 35.83479309082031, 629.7703857421875, 973.1768188476562, -272.0169677734375, 164.03573608398438, -294.9684143066406, 397.02752685546875, -8.718942642211914, 430.1692810058594, 7.262102127075195, 73.45266723632812, 496.0570068359375, -105.51397705078125, 336.3714904785156, -168.25430297851562, 120.25177001953125, 259.519775390625, -52.624900817871094, 208.0115203857422, -24.978988647460938, -239.02908325195312, -97.05963134765625, 108.79920959472656, -129.6800079345703, 406.857177734375, -233.41552734375, -156.3428955078125, 357.70611572265625, 263.01434326171875, 422.22479248046875, 45.45545959472656, -208.19349670410156, 287.9431457519531, 336.000244140625, 192.4769287109375, 71.84722900390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000335.npy"}
|
|
{"epoch": 0.4919236417033774, "step": 336, "batch_size": 64, "mean": 238.0438995361328, "std": 323.5335998535156, "min": -556.8159790039062, "p10": -94.99150238037109, "median": 223.46730041503906, "p90": 663.5703430175782, "max": 1126.6158447265625, "pos_frac": 0.84375, "sample": [330.54376220703125, 66.19939422607422, 863.3460693359375, 39.06951141357422, 433.4521484375, 237.7423095703125, 248.9287109375, -104.89192962646484, -54.121822357177734, 844.8743896484375, 260.1337890625, 381.20953369140625, 151.15615844726562, 446.9875183105469, 1126.6158447265625, -1.1123371124267578, 729.0440673828125, 388.9849548339844, 250.6155242919922, 413.08233642578125, -97.65574645996094, 77.56343078613281, 127.18418884277344, 277.39404296875, 133.3089599609375, 350.8799743652344, 115.7674560546875, 79.23773193359375, 631.5403442382812, 237.65673828125, 193.72079467773438, 63.39375305175781, 67.65788269042969, 209.27786254882812, 99.46241760253906, 87.235595703125, -449.671142578125, 677.2974853515625, -88.77493286132812, -205.63662719726562, 571.2954711914062, 22.464515686035156, 762.476318359375, 67.4349365234375, 441.445068359375, 61.699493408203125, -556.8159790039062, 92.1080551147461, 324.2490539550781, 316.7920227050781, 125.76283264160156, 72.0025405883789, 197.4818878173828, 63.672096252441406, 269.8246154785156, 535.13525390625, 455.32940673828125, 1057.78173828125, -331.7037353515625, 310.38751220703125, -436.8017578125, 374.9765319824219, 288.27630615234375, 510.83489990234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000336.npy"}
|
|
{"epoch": 0.4933920704845815, "step": 337, "batch_size": 64, "mean": 234.46226501464844, "std": 255.49624633789062, "min": -239.88829040527344, "p10": -101.54704971313475, "median": 209.59927368164062, "p90": 560.5297729492188, "max": 961.7635498046875, "pos_frac": 0.8125, "sample": [887.539306640625, 300.3265380859375, -193.95375061035156, 64.85186004638672, 543.5811767578125, 462.0758972167969, -31.974586486816406, 261.9462890625, 23.373729705810547, 408.8607177734375, 36.63208770751953, 272.778564453125, 269.38409423828125, 391.87445068359375, 422.1410217285156, -92.49329376220703, 214.0556640625, 187.9770965576172, -105.42723083496094, 494.24462890625, 208.1757354736328, 204.28952026367188, -6.965127944946289, 567.79345703125, 151.9290313720703, -134.3048095703125, 961.7635498046875, 67.98291015625, -141.05648803710938, -140.493896484375, 228.57321166992188, 190.4502410888672, 256.93597412109375, -176.25393676757812, 184.9128875732422, 211.02281188964844, 458.5583801269531, 5.0055999755859375, 272.6726989746094, 370.8155517578125, -6.730091094970703, 652.0319213867188, 69.58316040039062, 189.6763916015625, -239.88829040527344, 141.43295288085938, 789.653564453125, 477.504150390625, 334.95587158203125, 78.34749603271484, 205.97726440429688, 174.90277099609375, 650.8359985351562, 307.73883056640625, 159.0486297607422, 640.42431640625, 362.50201416015625, 201.40643310546875, 92.4060287475586, 343.6380615234375, 305.1399230957031, 255.19403076171875, -56.35585403442383, 316.5577087402344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000337.npy"}
|
|
{"epoch": 0.4948604992657856, "step": 338, "batch_size": 64, "mean": 219.0503387451172, "std": 213.3196258544922, "min": -215.43643188476562, "p10": -7.122251892089844, "median": 168.7763214111328, "p90": 500.81647033691405, "max": 763.3347778320312, "pos_frac": 0.875, "sample": [96.66474914550781, 499.9928894042969, 69.79084777832031, -215.43643188476562, 219.27200317382812, 109.13389587402344, -108.31201171875, 234.4988250732422, 612.7645263671875, 250.7890625, 106.98686218261719, 49.93693923950195, -54.24267578125, 468.3801574707031, 78.13519287109375, 276.2960205078125, 30.177215576171875, 524.67822265625, 334.4981384277344, 470.0140380859375, 610.3546142578125, -14.607093811035156, 234.92791748046875, 151.2846221923828, 261.315673828125, 32.43388366699219, 422.952880859375, -7.037101745605469, -56.299659729003906, 52.32860565185547, 190.3209991455078, 493.0896301269531, 449.78765869140625, 68.178466796875, 256.37957763671875, 394.8581848144531, 180.5645751953125, 44.787052154541016, 368.5547790527344, 147.1205596923828, 405.7433776855469, 39.483184814453125, 81.80999755859375, 126.57493591308594, 497.9591064453125, 28.166170120239258, 163.81463623046875, 124.2804183959961, 363.2449951171875, 131.85984802246094, 31.35125732421875, 24.462116241455078, 517.64599609375, 763.3347778320312, -7.158744812011719, 236.32395935058594, 494.52203369140625, 173.73800659179688, 341.0850830078125, 151.1671142578125, 587.7268676757812, -153.06277465820312, 58.665374755859375, 501.16943359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000338.npy"}
|
|
{"epoch": 0.49632892804698975, "step": 339, "batch_size": 64, "mean": 216.89700317382812, "std": 231.7227325439453, "min": -236.99472045898438, "p10": -54.77608985900878, "median": 221.35562896728516, "p90": 568.3123779296876, "max": 826.9299926757812, "pos_frac": 0.828125, "sample": [71.81271362304688, -122.00794219970703, -43.27011489868164, -40.415931701660156, 591.0228271484375, 25.085403442382812, 43.29621887207031, 165.40371704101562, 335.2525634765625, 34.373023986816406, 826.9299926757812, 2.3335113525390625, 101.19735717773438, 144.58938598632812, 294.662353515625, 372.523193359375, 15.970273971557617, 16.66283416748047, 78.06705474853516, 576.0738525390625, 253.24400329589844, -6.856437683105469, 237.33749389648438, -59.70722198486328, 254.25552368164062, 153.97483825683594, 617.647216796875, 28.151473999023438, 378.9117431640625, 617.59375, -120.04861450195312, 237.93972778320312, -236.99472045898438, 143.4408416748047, 462.3583068847656, 313.43572998046875, 179.65618896484375, -6.778711318969727, 242.13180541992188, 451.1503601074219, 136.1985321044922, 222.1636505126953, 390.03936767578125, 508.8324279785156, 108.49286651611328, 550.2022705078125, -131.98114013671875, 642.6986083984375, 268.7087707519531, -166.1591033935547, 259.715576171875, 193.70724487304688, 77.64097595214844, 220.547607421875, 46.071163177490234, 256.5369873046875, 230.7949981689453, -128.23309326171875, 417.0040283203125, 368.39617919921875, 524.2067260742188, 577.556396484375, 407.0296630859375, 270.8321533203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000339.npy"}
|
|
{"epoch": 0.4977973568281938, "step": 340, "batch_size": 64, "mean": 188.17184448242188, "std": 256.86224365234375, "min": -418.00311279296875, "p10": -119.0688758850097, "median": 166.60628509521484, "p90": 518.1206848144532, "max": 907.37939453125, "pos_frac": 0.78125, "sample": [176.78411865234375, 266.19964599609375, 87.8731460571289, 194.09326171875, 230.8927459716797, 465.18701171875, 593.6241455078125, -418.00311279296875, 253.696044921875, -47.729393005371094, 323.949951171875, 907.37939453125, 136.90460205078125, -237.27828979492188, 517.743896484375, -14.123497009277344, 302.15869140625, 125.03821563720703, -59.657142639160156, 94.26103210449219, 166.41151428222656, -143.58642578125, -341.6515808105469, 514.1956176757812, -164.9652557373047, 139.10891723632812, 60.89354705810547, 432.5928649902344, -45.47998046875, 308.2781982421875, 465.513671875, 45.11903381347656, 781.9313354492188, 661.4264526367188, -2.677614212036133, 129.84283447265625, 69.88165283203125, 296.1812744140625, 140.77345275878906, 194.44638061523438, 213.32095336914062, 93.872314453125, 206.55581665039062, 22.89205551147461, -154.29222106933594, 272.69976806640625, 249.2777099609375, -61.86125946044922, 518.2821655273438, 2.9474334716796875, 166.80105590820312, 439.8669738769531, 521.155029296875, 553.2645263671875, 336.4048156738281, 208.73776245117188, 115.24092102050781, 34.9914436340332, 367.99896240234375, 158.0799560546875, -264.999755859375, 383.2530212402344, -40.608642578125, 91.88762664794922], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000340.npy"}
|
|
{"epoch": 0.49926578560939794, "step": 341, "batch_size": 64, "mean": 231.0950164794922, "std": 262.06024169921875, "min": -279.6058349609375, "p10": -84.45407714843749, "median": 229.64261627197266, "p90": 541.8378723144532, "max": 1159.6937255859375, "pos_frac": 0.796875, "sample": [1159.6937255859375, -98.79901123046875, -86.19203186035156, -135.2514190673828, 435.3365478515625, 553.423583984375, 704.2529296875, 494.8340148925781, 226.4256591796875, 256.9441833496094, -18.096513748168945, 198.59732055664062, 164.20877075195312, 283.62237548828125, 44.64503479003906, -55.364776611328125, -68.16377258300781, 363.62274169921875, 73.70537567138672, 285.42578125, -150.847412109375, 384.7967529296875, 414.23114013671875, 253.92910766601562, 69.91048431396484, 559.3603515625, 129.98001098632812, 233.4754638671875, -39.71690368652344, -80.39884948730469, 128.3520050048828, 453.630859375, 633.066650390625, 267.6620178222656, 275.275146484375, 401.9912414550781, 100.67909240722656, 17.16819953918457, -67.7399673461914, 422.9405517578125, 517.7894287109375, 252.4556121826172, 86.39158630371094, 93.64041900634766, -279.6058349609375, 549.5689697265625, 104.49311828613281, 232.8595733642578, -247.02731323242188, 366.78533935546875, 219.54946899414062, 257.0445251464844, 431.29632568359375, -90.6540298461914, 768.9207763671875, 40.94023132324219, 336.1639404296875, 482.2222900390625, 433.586181640625, 78.96405029296875, 185.88937377929688, 101.16390228271484, 523.7986450195312, 153.22799682617188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000341.npy"}
|
|
{"epoch": 0.5007342143906021, "step": 342, "batch_size": 64, "mean": 203.21817016601562, "std": 273.95648193359375, "min": -550.9515380859375, "p10": -120.31016616821287, "median": 173.42942810058594, "p90": 507.48280029296876, "max": 1036.9178466796875, "pos_frac": 0.796875, "sample": [695.46337890625, -550.9515380859375, -220.83782958984375, 542.564697265625, 79.46652221679688, 55.00270080566406, 156.93679809570312, 340.83404541015625, 429.5384521484375, 353.37799072265625, 122.95951843261719, 109.20561218261719, 195.58531188964844, 200.0918731689453, 499.2168273925781, 292.68743896484375, 86.46717834472656, 436.4625549316406, -186.1981201171875, -35.50404357910156, 430.45343017578125, -75.83531951904297, 84.39443969726562, -130.20904541015625, -129.95980834960938, 308.4800109863281, 957.7503051757812, 373.2171325683594, 119.01402282714844, -200.6997528076172, -67.80796813964844, 1036.9178466796875, 204.58322143554688, 187.17303466796875, 10.301040649414062, -89.6178207397461, 551.1253662109375, 431.6344909667969, 602.7645263671875, 187.9269256591797, 3.060670852661133, 174.87496948242188, 361.6598205566406, 14.061187744140625, 511.0253601074219, 375.0450744628906, 161.35296630859375, 395.4219055175781, 125.90301513671875, 432.08612060546875, 437.11175537109375, 92.62223052978516, -153.97982788085938, 80.32514953613281, 403.0655822753906, -97.7943344116211, 171.98388671875, -60.216224670410156, 186.9010009765625, 127.19841003417969, 209.31423950195312, 426.78466796875, 154.63804626464844, 79.54170989990234], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000342.npy"}
|
|
{"epoch": 0.5022026431718062, "step": 343, "batch_size": 64, "mean": 243.47164916992188, "std": 224.06832885742188, "min": -376.45672607421875, "p10": 14.596405029296886, "median": 230.14461517333984, "p90": 515.7219665527344, "max": 848.1041870117188, "pos_frac": 0.921875, "sample": [372.91912841796875, 204.76675415039062, 164.8036346435547, 150.57965087890625, 267.4865417480469, 9.9315185546875, 312.9255676269531, 355.341552734375, 208.8966064453125, 723.5328369140625, 72.36958312988281, -141.4864501953125, 252.55221557617188, 25.48114013671875, 197.83255004882812, 47.246334075927734, 81.94976806640625, 237.8202362060547, 251.14999389648438, 743.2468872070312, 223.9876251220703, 331.31256103515625, 121.43745422363281, 301.3902587890625, 372.05645751953125, 337.8451232910156, 135.33592224121094, 108.35140991210938, 848.1041870117188, 167.3414764404297, 49.29212188720703, 91.84713745117188, 331.5943908691406, 115.4915771484375, 265.8660583496094, 336.4154052734375, 274.23638916015625, 97.70401000976562, 7.240108489990234, 456.5354919433594, -376.45672607421875, 320.9675598144531, 159.87094116210938, 404.0660095214844, 253.847412109375, 784.1376953125, 483.11993408203125, 290.16290283203125, 215.89596557617188, 323.6714172363281, 169.32220458984375, 368.6878967285156, 28.75128173828125, -109.92816162109375, 496.46038818359375, 523.9769287109375, 172.07386779785156, 174.5823211669922, 163.13784790039062, 236.30160522460938, -43.80957794189453, 604.179443359375, 610.6673583984375, -184.23251342773438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000343.npy"}
|
|
{"epoch": 0.5036710719530103, "step": 344, "batch_size": 64, "mean": 211.33169555664062, "std": 292.8844299316406, "min": -307.1170654296875, "p10": -38.20401611328125, "median": 173.40929412841797, "p90": 558.83583984375, "max": 1500.6175537109375, "pos_frac": 0.78125, "sample": [47.679481506347656, -77.90630340576172, -19.279739379882812, -13.259841918945312, 46.5561408996582, 58.05596923828125, 182.8887939453125, 97.2900161743164, 78.08705139160156, 260.45654296875, -1.3785991668701172, 915.5072021484375, 135.08474731445312, 264.1190185546875, 177.00621032714844, 170.990966796875, 182.3535919189453, -307.1170654296875, -276.82489013671875, 315.151123046875, 240.3619842529297, 49.69500732421875, 202.42857360839844, 5.249866485595703, 230.30853271484375, 788.845703125, 582.9253540039062, 389.4647521972656, 104.30338287353516, -38.391845703125, 391.1974792480469, 235.73388671875, 282.9595947265625, 73.0257568359375, 173.24188232421875, -37.7657470703125, 361.6986083984375, -28.8504638671875, 309.11212158203125, 25.41450309753418, 547.3577880859375, 249.19277954101562, 173.5767059326172, 47.43730926513672, 83.21874237060547, 1500.6175537109375, 472.40045166015625, 304.0062255859375, -8.625547409057617, 581.9908447265625, -11.807395935058594, 518.1363525390625, 144.425537109375, 567.9193725585938, 39.14460372924805, -182.32972717285156, -163.1377716064453, 469.67095947265625, 544.971435546875, -233.34896850585938, 288.6415710449219, 362.4056091308594, 89.18992614746094, 563.7550048828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000344.npy"}
|
|
{"epoch": 0.5051395007342144, "step": 345, "batch_size": 64, "mean": 237.87557983398438, "std": 281.6604919433594, "min": -438.1977233886719, "p10": -70.02271728515623, "median": 195.13054656982422, "p90": 629.4166015625003, "max": 944.49267578125, "pos_frac": 0.8125, "sample": [-37.22388458251953, 678.625, 82.75823974609375, 385.5531311035156, 189.64996337890625, -229.32528686523438, 719.8302612304688, -167.23304748535156, 147.48287963867188, 477.2344970703125, -29.996999740600586, 298.77020263671875, 145.68438720703125, 430.4862976074219, 556.7327270507812, 108.16140747070312, -349.2793273925781, 356.20074462890625, 53.85195541381836, 83.85755920410156, 468.8708190917969, 402.6497802734375, 99.22434997558594, 392.80645751953125, 277.124267578125, 544.6883544921875, 128.47561645507812, 334.5335388183594, 83.04339599609375, 194.8404083251953, 35.1300048828125, 847.9840087890625, 450.9012451171875, 31.534896850585938, 97.8741455078125, 449.7477111816406, 454.498291015625, 453.05743408203125, 357.9273376464844, 206.6776885986328, 412.36566162109375, 944.49267578125, 95.07797241210938, 174.5125732421875, 413.09344482421875, 256.5116882324219, -41.66218185424805, 770.86865234375, 195.42068481445312, 474.129150390625, -55.63072967529297, -126.3973159790039, 196.55111694335938, 660.5668334960938, -11.198919296264648, -151.32281494140625, -76.19071197509766, 122.75460815429688, 706.9315795898438, 201.92538452148438, 141.232177734375, 143.17413330078125, -438.1977233886719, 1.6179046630859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000345.npy"}
|
|
{"epoch": 0.5066079295154186, "step": 346, "batch_size": 64, "mean": 224.0716094970703, "std": 267.63995361328125, "min": -628.772216796875, "p10": -96.83586349487302, "median": 224.06234741210938, "p90": 570.723553466797, "max": 808.4140014648438, "pos_frac": 0.796875, "sample": [318.0055847167969, 212.56996154785156, 21.44542694091797, 307.9814453125, 666.2012939453125, 546.0035400390625, 693.8778076171875, 196.1226348876953, 137.41709899902344, 770.2504272460938, -628.772216796875, 114.23155212402344, 401.2762451171875, 67.86825561523438, 327.3682556152344, 657.296630859375, -47.62615966796875, 316.943359375, 808.4140014648438, 120.88394165039062, 378.530029296875, 178.83657836914062, -137.86837768554688, 411.39202880859375, 314.77734375, 319.9234619140625, 370.6268615722656, 260.11376953125, 452.1483154296875, 64.02566528320312, 11.154233932495117, -107.65535736083984, 226.2029266357422, 50.04896545410156, 641.2168579101562, 221.92176818847656, 220.4002227783203, 130.70742797851562, 279.8194274902344, -178.0355987548828, -20.034828186035156, 579.6929931640625, 313.05615234375, 549.7948608398438, 208.0519561767578, 376.32666015625, 392.1484375, 355.4374694824219, -20.766590118408203, -69.53240966796875, 274.2144775390625, -71.59037780761719, 71.80050659179688, -213.09646606445312, 133.24632263183594, 512.9927978515625, 133.56369018554688, 150.2696533203125, 456.0361633300781, -244.35073852539062, -34.4130859375, -173.21337890625, 269.9779968261719, 294.92425537109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000346.npy"}
|
|
{"epoch": 0.5080763582966226, "step": 347, "batch_size": 64, "mean": 220.20404052734375, "std": 295.0769958496094, "min": -386.6219787597656, "p10": -121.00106887817383, "median": 176.42359924316406, "p90": 629.4262329101564, "max": 913.892578125, "pos_frac": 0.765625, "sample": [-386.6219787597656, 286.1881103515625, 427.31005859375, 360.0544738769531, 111.28551483154297, -56.023231506347656, 213.09841918945312, 281.73492431640625, 706.55078125, 21.99864959716797, 467.32098388671875, 31.565162658691406, 148.18544006347656, -118.34561157226562, 448.68792724609375, -320.04522705078125, -277.92645263671875, -118.21273803710938, 800.232177734375, 83.58824920654297, 586.5626831054688, 32.952030181884766, 177.41143798828125, 312.42620849609375, -224.22119140625, 543.10009765625, 570.186279296875, 347.55487060546875, -17.002647399902344, 714.5479736328125, 379.8228759765625, 662.5194091796875, 334.701171875, -52.226234436035156, 229.62884521484375, -122.13912200927734, 519.1170654296875, 144.55340576171875, 131.928955078125, -71.74327087402344, 175.43576049804688, 281.2651672363281, 83.27428436279297, 347.7645263671875, 145.42892456054688, 93.92373657226562, 121.87896728515625, -42.56177520751953, 913.892578125, 91.0781478881836, 379.46978759765625, 647.7963256835938, 184.0223846435547, 826.575927734375, -94.96002960205078, 37.444740295410156, 344.56439208984375, -208.76248168945312, 434.7796325683594, 2.5583343505859375, 477.6059265136719, -147.1201934814453, 139.49253845214844, 547.9046020507812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000347.npy"}
|
|
{"epoch": 0.5095447870778267, "step": 348, "batch_size": 64, "mean": 189.67080688476562, "std": 265.00286865234375, "min": -320.6519775390625, "p10": -117.59218978881833, "median": 190.4236602783203, "p90": 455.5255462646484, "max": 1138.271484375, "pos_frac": 0.75, "sample": [338.38836669921875, 77.97350311279297, 159.18441772460938, 430.7359619140625, 355.90313720703125, -58.552589416503906, 90.72510528564453, 345.592529296875, 378.133056640625, -320.6519775390625, 102.18031311035156, 61.047210693359375, 645.384521484375, 269.8117980957031, 136.6550750732422, 316.4888000488281, 13.898538589477539, 315.219482421875, 131.2049560546875, 109.54412078857422, 358.1109924316406, 296.4319763183594, -87.82384490966797, 1138.271484375, 451.6849670410156, 260.5068664550781, -65.41475677490234, 237.37542724609375, -48.39244079589844, 37.54729461669922, -82.31654357910156, -149.06581115722656, -159.38650512695312, 868.3060913085938, 599.40673828125, 349.25457763671875, 329.7330322265625, 292.8187255859375, 221.66290283203125, 4.41224479675293, -238.75172424316406, 433.225830078125, -17.61450958251953, 307.7171325683594, 271.3600158691406, 227.43789672851562, -211.31768798828125, 10.374032974243164, 553.7444458007812, 312.71337890625, 108.72306060791016, -37.15827941894531, 457.1715087890625, -262.86700439453125, 360.37652587890625, 229.36424255371094, 97.61206817626953, -2.6372337341308594, 29.400604248046875, 533.23974609375, 90.89694213867188, 330.6854248046875, -130.3500518798828, -66.40365600585938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000348.npy"}
|
|
{"epoch": 0.5110132158590308, "step": 349, "batch_size": 64, "mean": 216.3872528076172, "std": 311.50262451171875, "min": -523.735107421875, "p10": -100.53388519287107, "median": 154.6573486328125, "p90": 580.7486206054688, "max": 1232.559814453125, "pos_frac": 0.78125, "sample": [137.12603759765625, -359.4029541015625, 437.4070129394531, 125.3019027709961, 86.58535766601562, 149.15505981445312, 486.3140869140625, -109.9931869506836, 237.52841186523438, 412.0101623535156, 191.59115600585938, 510.248046875, 111.41211700439453, -54.414390563964844, 40.407012939453125, 16.138906478881836, -3.9861507415771484, 21.80406951904297, 248.95761108398438, -196.24606323242188, 536.9299926757812, 372.47674560546875, 383.6249084472656, 20.393735885620117, 341.49755859375, 15.297222137451172, 484.679443359375, -78.4621810913086, 121.42717742919922, 182.12210083007812, 790.5144653320312, 33.33519744873047, -35.0174674987793, 567.7794799804688, 200.80563354492188, -523.735107421875, -151.14950561523438, 115.63324737548828, 601.3690185546875, 41.070743560791016, 744.849365234375, -5.056243896484375, -279.2729797363281, -68.75862121582031, 559.733154296875, 655.486572265625, 909.446533203125, 183.91184997558594, 586.3068237304688, 160.15963745117188, 436.96923828125, 460.36810302734375, -15.088081359863281, 357.8921813964844, 287.48211669921875, 297.2568054199219, -220.86648559570312, 55.66184997558594, 15.562236785888672, 485.2790222167969, 404.05133056640625, 43.734649658203125, 52.57891845703125, 1232.559814453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000349.npy"}
|
|
{"epoch": 0.5124816446402349, "step": 350, "batch_size": 64, "mean": 209.05242919921875, "std": 289.9720153808594, "min": -606.6255493164062, "p10": -171.00472869873047, "median": 198.8621368408203, "p90": 532.5513977050783, "max": 943.2975463867188, "pos_frac": 0.828125, "sample": [817.62451171875, -384.323486328125, 106.91522979736328, 420.8197937011719, 321.0352783203125, 178.2570037841797, 471.57025146484375, 89.92990112304688, -144.78431701660156, 126.17240905761719, 107.55630493164062, 454.7051696777344, -420.18212890625, 161.2045440673828, 200.4522705078125, 5.723663330078125, 680.6370849609375, 465.087890625, 943.2975463867188, 290.3802795410156, 186.89495849609375, 267.76812744140625, -43.57250213623047, 467.8252258300781, 253.98956298828125, 709.6295166015625, 145.32177734375, -194.922607421875, 156.44625854492188, 49.82752227783203, 554.6517333984375, 106.29150390625, 175.26019287109375, 197.27200317382812, 322.7903137207031, 292.68841552734375, 146.55636596679688, 286.77301025390625, -251.50067138671875, 650.4625244140625, 187.7472381591797, 480.98394775390625, 366.4393310546875, -173.68212890625, 179.5970458984375, 234.04893493652344, 602.8392944335938, 424.4551086425781, 215.26296997070312, 322.40582275390625, -606.6255493164062, 419.46112060546875, 98.59159851074219, 246.74600219726562, 47.94903564453125, 230.2664794921875, -335.21246337890625, -91.2584228515625, 22.963523864746094, 183.75576782226562, 427.265625, 234.87667846679688, -164.75746154785156, 452.70257568359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000350.npy"}
|
|
{"epoch": 0.5139500734214391, "step": 351, "batch_size": 64, "mean": 252.74497985839844, "std": 318.532470703125, "min": -358.80255126953125, "p10": -105.29508819580073, "median": 224.76630401611328, "p90": 685.8322692871094, "max": 1172.0994873046875, "pos_frac": 0.765625, "sample": [84.82916259765625, 313.55096435546875, 537.9082641601562, 1054.3948974609375, 1172.0994873046875, 168.54115295410156, 403.1637878417969, 112.1473388671875, 272.60052490234375, 302.28985595703125, 50.23004150390625, 229.642333984375, 688.1254272460938, 810.4240112304688, 345.09429931640625, 375.0956726074219, 513.2069091796875, 616.5144653320312, 680.4815673828125, 5.137353897094727, -288.2640075683594, 390.0844421386719, 96.01425170898438, 200.08560180664062, 196.83999633789062, 359.0082702636719, 493.03094482421875, 228.8745574951172, -358.80255126953125, -50.02442932128906, -295.4675598144531, 163.55194091796875, -178.90069580078125, 96.83873748779297, -0.787322998046875, 865.1365966796875, 26.899497985839844, 130.07388305664062, 227.1419677734375, -0.6306533813476562, -128.98251342773438, 392.9728698730469, -18.657543182373047, 702.2907104492188, 88.62435913085938, -17.3868350982666, 222.39064025878906, -6.146820068359375, 20.03692626953125, -189.00611877441406, -35.627235412597656, 670.9004516601562, 192.01585388183594, 409.78216552734375, 734.3846435546875, -180.6837615966797, 301.40716552734375, 677.993896484375, 304.35845947265625, 285.9725036621094, 156.69053649902344, 371.454833984375, 229.0377960205078, -44.32494354248047], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000351.npy"}
|
|
{"epoch": 0.5154185022026432, "step": 352, "batch_size": 64, "mean": 154.27169799804688, "std": 277.7617492675781, "min": -630.897705078125, "p10": -142.59793548583983, "median": 124.16315841674805, "p90": 492.4511657714844, "max": 964.2426147460938, "pos_frac": 0.6875, "sample": [370.6941833496094, 378.0517578125, 151.44464111328125, 477.8457336425781, -346.3918151855469, 113.37186431884766, 56.386268615722656, 428.7138671875, 83.61796569824219, 231.2806396484375, 640.8895263671875, 494.2449951171875, -57.78745651245117, 63.89427947998047, 224.33909606933594, -115.36402893066406, -84.7789306640625, 964.2426147460938, 290.1365051269531, -148.78744506835938, 175.8382568359375, 708.4384765625, 140.12278747558594, 423.6473388671875, 66.97633361816406, -103.56787872314453, -91.70303344726562, -264.99713134765625, 8.656471252441406, 109.94244384765625, -13.889801025390625, 488.26556396484375, 17.330535888671875, 29.725303649902344, 580.6672973632812, 129.6619873046875, -630.897705078125, 67.4289321899414, 230.869140625, 377.2275695800781, 223.09059143066406, 172.11764526367188, 173.6696319580078, -203.58670043945312, -71.49119567871094, -31.944366455078125, -183.63101196289062, 582.09912109375, 471.637451171875, -162.0402069091797, 266.2066650390625, -65.2603988647461, -128.15574645996094, 453.097900390625, 266.22845458984375, 212.24136352539062, 394.96063232421875, -19.17156219482422, 572.4995727539062, 118.6643295288086, -22.704925537109375, -100.12675476074219, 208.016357421875, 81.18502807617188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000352.npy"}
|
|
{"epoch": 0.5168869309838473, "step": 353, "batch_size": 64, "mean": 202.0507354736328, "std": 280.7708740234375, "min": -459.3804016113281, "p10": -83.51435012817383, "median": 142.86984252929688, "p90": 598.4275512695312, "max": 876.1688232421875, "pos_frac": 0.703125, "sample": [348.3616943359375, 380.8893737792969, 41.54857635498047, 464.9344482421875, 187.83377075195312, 439.531982421875, 47.42170715332031, -82.53789520263672, 576.3453369140625, 167.672119140625, 224.80093383789062, -42.96881866455078, 268.76171875, 572.4124145507812, 17.081939697265625, 418.093505859375, -112.61424255371094, -12.7978515625, 589.0533447265625, 706.4724731445312, 138.0071563720703, 286.7730407714844, -41.11777114868164, 103.29376220703125, -459.3804016113281, 412.7105712890625, -157.55084228515625, -51.27198028564453, -69.94269561767578, 269.354248046875, -15.556440353393555, 147.73252868652344, 322.8925476074219, 319.98297119140625, 43.623779296875, -117.28005981445312, 51.423797607421875, -152.59707641601562, 779.6221923828125, 1.7158050537109375, -54.57122802734375, 642.0028076171875, -57.959999084472656, 58.17304229736328, 119.04730987548828, 293.5189208984375, 334.38787841796875, 136.09117126464844, -39.795806884765625, 602.445068359375, 247.91897583007812, -68.37518310546875, 577.5819091796875, 385.9265441894531, 876.1688232421875, 792.021728515625, -193.91148376464844, 375.3765563964844, 622.46728515625, -83.93283081054688, 108.4616928100586, 41.098506927490234, -45.2855224609375, 249.65927124023438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000353.npy"}
|
|
{"epoch": 0.5183553597650514, "step": 354, "batch_size": 64, "mean": 227.90103149414062, "std": 227.87921142578125, "min": -145.05081176757812, "p10": -43.271263885498016, "median": 201.76913452148438, "p90": 482.2512115478516, "max": 935.43408203125, "pos_frac": 0.859375, "sample": [303.9041748046875, 485.113525390625, 329.4228515625, 34.052398681640625, 442.6803283691406, 201.85845947265625, 641.0870361328125, -81.32365417480469, 260.48431396484375, -105.86961364746094, -14.361396789550781, -142.53280639648438, 204.14720153808594, 935.43408203125, 460.7271423339844, 91.84041595458984, 18.395668029785156, 384.0247497558594, 437.5292053222656, 329.2491149902344, 16.639625549316406, 391.8421325683594, 148.81564331054688, 121.32876586914062, -137.63919067382812, 134.648681640625, 377.4793701171875, 193.82936096191406, 96.77220916748047, 345.28497314453125, 500.20306396484375, -132.29627990722656, 324.9912414550781, 259.6898193359375, 149.96292114257812, 177.91493225097656, -145.05081176757812, 475.5724792480469, 201.6798095703125, 29.455276489257812, 460.6644592285156, 15.913917541503906, 355.80548095703125, 202.935302734375, 81.34473419189453, 325.41015625, 91.0862045288086, 702.3811645507812, 393.7362060546875, -15.763275146484375, 18.07961082458496, 427.31451416015625, 404.11578369140625, 104.84780883789062, -55.060401916503906, 679.0970458984375, 40.14147186279297, 349.69677734375, 358.0356140136719, 97.47868347167969, 535.8135375976562, 111.83384704589844, 136.13705444335938, 17.63247299194336], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000354.npy"}
|
|
{"epoch": 0.5198237885462555, "step": 355, "batch_size": 64, "mean": 225.7357177734375, "std": 229.86846923828125, "min": -324.9645080566406, "p10": -30.32895526885985, "median": 210.5216522216797, "p90": 522.3421386718751, "max": 914.073974609375, "pos_frac": 0.828125, "sample": [139.15357971191406, 526.388916015625, 365.9240417480469, 512.899658203125, 69.18965911865234, 252.77101135253906, 250.92950439453125, 914.073974609375, -321.79095458984375, 255.42137145996094, 242.43551635742188, 593.9324951171875, -4.978706359863281, -17.74593162536621, 431.3343505859375, 584.7337036132812, -78.9520263671875, 546.7650756835938, 154.6210479736328, 130.48574829101562, 408.04290771484375, 256.9345703125, 0.8321990966796875, 1.0284233093261719, 378.05450439453125, 501.0787353515625, 461.6810302734375, 288.279052734375, 168.57447814941406, -3.6902618408203125, -70.78656005859375, 216.634521484375, 44.93061828613281, -324.9645080566406, 237.19473266601562, -136.9355010986328, 339.9219970703125, 579.5394897460938, 302.3478088378906, 670.2696533203125, 87.5799331665039, 185.2608642578125, 112.57106018066406, 167.88232421875, 179.46231079101562, 277.1622314453125, 111.51177215576172, 444.3353576660156, 89.34576416015625, 207.19430541992188, 475.07489013671875, 420.1667785644531, 292.9024658203125, 205.49191284179688, 109.75971984863281, -35.7216796875, 132.7085723876953, -48.595821380615234, 213.8489990234375, -7.652912139892578, 87.96456909179688, 377.8345947265625, 333.87835693359375, 160.56011962890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000355.npy"}
|
|
{"epoch": 0.5212922173274597, "step": 356, "batch_size": 64, "mean": 123.73868560791016, "std": 296.9876403808594, "min": -580.0505981445312, "p10": -233.57044830322263, "median": 112.79378890991211, "p90": 496.9706512451173, "max": 944.3052978515625, "pos_frac": 0.671875, "sample": [247.8245849609375, 38.892906188964844, 342.1179504394531, 77.1993637084961, 0.4556694030761719, -92.46987915039062, -116.64532470703125, 198.10287475585938, -116.01872253417969, 247.1649169921875, 292.59552001953125, 0.8722343444824219, 731.5053100585938, 463.7488708496094, -126.22630310058594, 610.2012939453125, -155.7764129638672, -188.0206756591797, -580.0505981445312, -244.06411743164062, 44.320167541503906, 784.2017211914062, -72.33659362792969, 120.44158935546875, 324.3758544921875, 511.20855712890625, -105.5578842163086, 394.5140075683594, -333.4460754394531, 70.10674285888672, -276.99267578125, 653.996337890625, 113.95576477050781, -281.1777648925781, 388.2528076171875, 114.43315124511719, 253.36907958984375, 345.93792724609375, -209.08522033691406, 51.51765441894531, -96.13417053222656, -102.7268295288086, 173.7303466796875, 223.7396240234375, 8.102838516235352, 281.22052001953125, 292.5920715332031, -192.0137176513672, 677.5839233398438, 220.70767211914062, 194.97613525390625, -2.279754638671875, -301.8314208984375, -5.693000793457031, 239.807373046875, 111.6318130493164, 126.17820739746094, 282.2066650390625, -376.0046691894531, 944.3052978515625, 377.60308837890625, 251.4455108642578, 8.229881286621094, 58.453773498535156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000356.npy"}
|
|
{"epoch": 0.5227606461086637, "step": 357, "batch_size": 64, "mean": 202.65982055664062, "std": 245.2743377685547, "min": -608.938232421875, "p10": -39.72139568328856, "median": 159.15419006347656, "p90": 559.5482910156251, "max": 772.615478515625, "pos_frac": 0.859375, "sample": [380.36004638671875, 571.0425415039062, 66.41859436035156, 634.849609375, 541.8399658203125, 46.29283142089844, 67.62269592285156, 414.825927734375, 143.15548706054688, 528.05517578125, 251.53074645996094, 102.11144256591797, -168.59317016601562, 569.9592895507812, -57.425079345703125, 140.16738891601562, 155.84024047851562, 162.4681396484375, 274.545654296875, -95.65797424316406, 729.0716552734375, 3.0835800170898438, 295.7194519042969, -9.240676879882812, -608.938232421875, 48.822994232177734, 314.32830810546875, 125.65868377685547, -31.41343116760254, 567.1375732421875, 306.3926086425781, -126.20734405517578, 487.6840515136719, -116.04428100585938, 379.9537353515625, 0.22712326049804688, 362.8873596191406, 772.615478515625, 286.93756103515625, 276.28338623046875, 7.388824462890625, 7.361181259155273, 295.2706298828125, 486.7042236328125, 52.89679718017578, 77.74171447753906, 53.99192810058594, 279.20147705078125, 18.550350189208984, 200.85067749023438, 5.148927688598633, 269.9248046875, 176.28433227539062, 7.7466888427734375, 303.01336669921875, 318.5137634277344, 117.74662780761719, 314.24395751953125, -43.281951904296875, 245.5934600830078, 112.61222839355469, 676.28564453125, 39.9957275390625, 152.07363891601562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000357.npy"}
|
|
{"epoch": 0.5242290748898678, "step": 358, "batch_size": 64, "mean": 192.69509887695312, "std": 250.69949340820312, "min": -390.21484375, "p10": -53.252734374999996, "median": 148.53872680664062, "p90": 494.21686401367197, "max": 883.833251953125, "pos_frac": 0.78125, "sample": [63.070220947265625, 645.0127563476562, 63.45201110839844, 263.0637512207031, 264.46881103515625, 345.68218994140625, 76.44224548339844, 289.3414306640625, 109.43359375, 45.59559631347656, 3.5593795776367188, -47.21258544921875, -113.39065551757812, -24.274551391601562, -3.453157424926758, 757.711669921875, -43.499794006347656, 470.55413818359375, 42.997222900390625, 117.31942749023438, 32.94768524169922, -2.5168609619140625, 19.18572235107422, 70.34244537353516, 158.16357421875, -390.21484375, 349.3166809082031, -145.04852294921875, 883.833251953125, 415.435791015625, 463.92138671875, 213.97006225585938, 304.298828125, -32.822975158691406, 593.9525146484375, 201.64190673828125, 5.448127746582031, 101.61614990234375, -95.15441131591797, 332.6430969238281, 307.674072265625, 406.8035583496094, -179.99136352539062, 315.89202880859375, 406.31231689453125, -55.84136962890625, 370.5783386230469, 695.5030517578125, -12.372661590576172, 111.47001647949219, 365.20623779296875, 163.90219116210938, 80.64517211914062, 13.786796569824219, 504.3580322265625, 862.4113159179688, 72.08257293701172, 219.37771606445312, 263.06982421875, -102.16201782226562, 194.40049743652344, 156.39395141601562, 225.4694366455078, 140.68350219726562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000358.npy"}
|
|
{"epoch": 0.5256975036710719, "step": 359, "batch_size": 64, "mean": 224.2485809326172, "std": 288.9275817871094, "min": -376.64849853515625, "p10": -124.15082244873045, "median": 176.7684783935547, "p90": 604.7479980468751, "max": 978.936279296875, "pos_frac": 0.765625, "sample": [-376.64849853515625, 239.898681640625, 182.69174194335938, 434.6165466308594, -133.59750366210938, 711.412109375, -213.62673950195312, 892.0838012695312, 113.41817474365234, -83.37483215332031, -149.9470977783203, -65.73197174072266, 380.0054016113281, 328.7365417480469, -179.57974243164062, 53.82972717285156, 389.5958251953125, -83.03729248046875, 501.70318603515625, -158.86233520507812, 279.8422546386719, 234.88815307617188, 532.326171875, 82.7417221069336, 10.647907257080078, 135.4964141845703, -42.265708923339844, 528.048095703125, 79.904296875, 46.35448455810547, 469.79052734375, 292.40350341796875, 978.936279296875, 449.45294189453125, -102.10856628417969, 58.428260803222656, 199.586181640625, 102.8189926147461, -154.3475341796875, 685.9373779296875, 135.4988250732422, 511.0741271972656, 252.70274353027344, 376.55694580078125, -23.578094482421875, 165.28038024902344, 503.2309265136719, 247.60040283203125, 131.65621948242188, 723.3580322265625, 53.12566375732422, -22.652145385742188, -99.77598571777344, 307.625244140625, 215.0445556640625, 614.1470947265625, 44.662841796875, 214.04469299316406, 781.8333740234375, 527.3495483398438, 122.27246856689453, 582.8167724609375, 164.7219696044922, 170.84521484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000359.npy"}
|
|
{"epoch": 0.527165932452276, "step": 360, "batch_size": 64, "mean": 172.14373779296875, "std": 206.3827667236328, "min": -165.85263061523438, "p10": -75.5314552307129, "median": 147.30007934570312, "p90": 446.4451385498048, "max": 652.3556518554688, "pos_frac": 0.796875, "sample": [54.903045654296875, 515.3936767578125, 4.463859558105469, 122.1954345703125, -35.88390350341797, 343.6920166015625, 58.018959045410156, 185.69900512695312, 386.2077941894531, 151.7951202392578, 149.0478515625, 511.9936218261719, -116.77891540527344, 323.84149169921875, 652.3556518554688, 111.66590881347656, 39.1193733215332, 46.96711730957031, 302.48297119140625, 96.97561645507812, 367.1550598144531, 195.0708465576172, -66.3033676147461, 250.0842742919922, 29.68056869506836, 342.09661865234375, 411.16595458984375, 80.46354675292969, 0.5912055969238281, 461.5647888183594, 533.4970703125, 366.251953125, 512.255126953125, -76.88313293457031, 55.982139587402344, 150.3088836669922, 205.9949951171875, 117.27981567382812, 395.8818359375, -52.647491455078125, 70.13197326660156, 607.05322265625, -142.64022827148438, -115.90182495117188, -84.29293823242188, 156.69827270507812, -165.85263061523438, 154.1070098876953, 13.372003555297852, -142.50221252441406, -18.810028076171875, 325.29473876953125, 17.757299423217773, 388.4170837402344, 409.719970703125, 260.5455322265625, 243.25914001464844, -72.3775405883789, 145.55230712890625, 403.8544006347656, 80.45936584472656, -72.37284088134766, 18.951622009277344, 353.1300964355469], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000360.npy"}
|
|
{"epoch": 0.5286343612334802, "step": 361, "batch_size": 64, "mean": 176.93666076660156, "std": 258.85650634765625, "min": -433.2300109863281, "p10": -88.87191009521484, "median": 193.77503967285156, "p90": 530.8234741210938, "max": 786.9251708984375, "pos_frac": 0.6875, "sample": [30.74092674255371, 209.01016235351562, 19.526012420654297, 366.79327392578125, -65.8343276977539, -303.2352600097656, -47.500282287597656, 195.66500854492188, 526.5777587890625, 141.11607360839844, 196.56472778320312, 243.70339965820312, -433.2300109863281, 415.43218994140625, -42.69136047363281, -76.56024932861328, 191.88507080078125, 151.9694366455078, -310.826416015625, -7.133241653442383, 208.9725341796875, -88.73604583740234, -7.351255416870117, -18.859905242919922, 226.91600036621094, 747.2235107421875, 351.38592529296875, 439.78045654296875, 113.40618896484375, 435.11865234375, -76.89360046386719, -37.39077377319336, 215.52626037597656, 199.90225219726562, 480.04083251953125, 547.3516845703125, -116.24891662597656, 786.9251708984375, 687.1710815429688, 525.7738647460938, 179.48667907714844, 202.28440856933594, 532.64306640625, 392.65869140625, 166.14743041992188, 196.82827758789062, 100.72325897216797, 212.32449340820312, -53.41302490234375, -88.93013763427734, 51.46062469482422, -62.13437271118164, 551.1358032226562, -7.818153381347656, 349.1455078125, 276.28240966796875, 148.70318603515625, -134.2550048828125, 262.4823913574219, 50.37278747558594, 244.24359130859375, -245.28329467773438, 557.1740112304688, 419.69757080078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000361.npy"}
|
|
{"epoch": 0.5301027900146843, "step": 362, "batch_size": 64, "mean": 276.5138854980469, "std": 268.0620422363281, "min": -169.37771606445312, "p10": -28.023215484619133, "median": 232.1100616455078, "p90": 620.218798828125, "max": 1302.0435791015625, "pos_frac": 0.859375, "sample": [110.737548828125, 152.47161865234375, 528.31396484375, 578.9345703125, 294.3116149902344, 10.177558898925781, 1302.0435791015625, 269.90399169921875, 141.59515380859375, 654.5501708984375, 504.1483459472656, 123.12967681884766, 61.556983947753906, 156.31350708007812, 263.734130859375, 462.7675476074219, 152.8663787841797, -30.921911239624023, 105.19000244140625, 376.9049072265625, 480.44287109375, 100.67453002929688, 20.2231502532959, -65.43753051757812, -12.778610229492188, -21.259592056274414, 150.42599487304688, 223.38539123535156, -114.30321502685547, 693.7906494140625, 589.7765502929688, 114.54920959472656, -86.955810546875, 256.9780578613281, 265.5118408203125, 178.62855529785156, 609.1427001953125, -48.105499267578125, 214.2425079345703, 624.9656982421875, 240.83473205566406, 108.57522583007812, 718.568115234375, 478.8663024902344, 152.86489868164062, 151.6644287109375, 395.29229736328125, 181.16073608398438, 70.45863342285156, 776.7134399414062, 242.22018432617188, 762.1973266601562, 475.0221862792969, 377.469482421875, 282.76861572265625, 462.4358825683594, 513.1502075195312, -93.10614013671875, 191.74195861816406, 368.9721984863281, 283.1351318359375, 325.4558410644531, 7.178558349609375, -169.37771606445312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000362.npy"}
|
|
{"epoch": 0.5315712187958884, "step": 363, "batch_size": 64, "mean": 278.2501220703125, "std": 272.1302490234375, "min": -262.06915283203125, "p10": -29.339080047607414, "median": 258.8816375732422, "p90": 643.0626098632813, "max": 1288.8670654296875, "pos_frac": 0.859375, "sample": [-262.06915283203125, 365.2354431152344, 579.8530883789062, 243.16433715820312, 115.56925964355469, 217.63720703125, 294.52093505859375, 257.99237060546875, 261.206298828125, 153.3634490966797, 360.6451416015625, 259.38787841796875, 360.29205322265625, -7.656852722167969, 363.8011474609375, 863.813720703125, -33.201202392578125, 623.6043701171875, 83.20774841308594, 703.3868408203125, 456.8634948730469, 60.55364227294922, 308.39422607421875, 391.138427734375, 331.4158630371094, 418.28533935546875, 1288.8670654296875, 514.2841796875, -64.4935302734375, 605.6915893554688, 153.67189025878906, 230.61537170410156, 18.550994873046875, 263.45587158203125, 450.40936279296875, 50.24793243408203, 226.93783569335938, -38.54632568359375, 651.40185546875, 463.26959228515625, 412.76617431640625, -127.49483489990234, 40.904930114746094, 420.62603759765625, 36.579566955566406, 408.62213134765625, 679.5469360351562, 4.61785888671875, 252.71197509765625, 394.9355163574219, -36.095924377441406, 135.87646484375, -108.5083236694336, 416.4530029296875, 258.3753967285156, 21.32807159423828, 17.64550018310547, 229.90328979492188, 46.78921127319336, 29.89289093017578, 694.102294921875, 691.7026977539062, -20.32746124267578, 322.285400390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000363.npy"}
|
|
{"epoch": 0.5330396475770925, "step": 364, "batch_size": 64, "mean": 206.77392578125, "std": 282.0346984863281, "min": -514.3486938476562, "p10": -87.62141647338866, "median": 203.9774627685547, "p90": 519.6166381835938, "max": 930.427490234375, "pos_frac": 0.796875, "sample": [431.2198791503906, 239.88572692871094, -225.3948211669922, 37.329734802246094, -514.3486938476562, 589.436279296875, -236.06350708007812, 373.32208251953125, 229.19589233398438, 905.2470092773438, 207.8909149169922, 633.9574584960938, 440.048583984375, 353.8331298828125, -60.91413116455078, 71.03593444824219, 54.95143127441406, -288.1734619140625, 323.4681091308594, -46.589599609375, -92.98699951171875, 416.90264892578125, 223.98892211914062, 426.2620849609375, 313.910400390625, 21.863954544067383, 5.579494476318359, 507.68048095703125, 200.0640106201172, 294.11810302734375, -27.962265014648438, 706.29296875, 124.7328872680664, 15.406585693359375, 113.47724151611328, 91.88949584960938, 459.4336242675781, 328.5985412597656, 289.031494140625, -123.39804077148438, 113.04235076904297, 515.8560180664062, 695.4214477539062, 107.33741760253906, 326.42706298828125, -25.96076202392578, -4.466531753540039, 386.14154052734375, 521.2283325195312, 360.6866149902344, 12.744487762451172, 187.37588500976562, 43.073211669921875, 167.21957397460938, 930.427490234375, 62.98291015625, 308.35260009765625, 303.5256042480469, -75.10172271728516, 257.5251159667969, 23.338897705078125, 151.05810546875, 463.615234375, -412.5424499511719], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000364.npy"}
|
|
{"epoch": 0.5345080763582967, "step": 365, "batch_size": 64, "mean": 221.8110809326172, "std": 262.0715026855469, "min": -225.78330993652344, "p10": -74.76783447265622, "median": 167.2810821533203, "p90": 515.403973388672, "max": 1251.3406982421875, "pos_frac": 0.84375, "sample": [170.73336791992188, 130.18917846679688, 140.92694091796875, 125.50338745117188, 106.0483169555664, 137.34796142578125, 354.2581787109375, 117.94098663330078, 259.1622009277344, 169.29263305664062, 308.56182861328125, 485.62432861328125, -86.61038208007812, 224.08792114257812, 72.68067932128906, -47.135223388671875, 88.78657531738281, 707.70947265625, 559.427490234375, 295.7680358886719, 148.02728271484375, 72.65648651123047, 84.17623138427734, 18.890960693359375, 242.3701934814453, -117.1776123046875, 90.00749206542969, 34.73410415649414, 204.09262084960938, 165.26953125, -118.63117980957031, -36.73672103881836, 118.60968780517578, 40.541385650634766, 432.370361328125, 663.0087280273438, 438.90972900390625, 67.35432434082031, -107.79083251953125, 501.09930419921875, 286.036865234375, -200.81814575195312, 232.22560119628906, 8.083658218383789, 14.485326766967773, 287.5585632324219, 265.3354187011719, 286.2047119140625, 373.26141357421875, -144.49017333984375, 59.341468811035156, 402.55120849609375, -1.529500961303711, 279.87603759765625, 274.93194580078125, 329.1527404785156, 1251.3406982421875, 521.5345458984375, 490.8909912109375, -225.78330993652344, 799.867431640625, 787.3084716796875, 127.66996765136719, 428.78814697265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000365.npy"}
|
|
{"epoch": 0.5359765051395007, "step": 366, "batch_size": 64, "mean": 222.82901000976562, "std": 290.5567626953125, "min": -334.91253662109375, "p10": -131.32156829833983, "median": 199.9437255859375, "p90": 603.9563598632812, "max": 975.8141479492188, "pos_frac": 0.78125, "sample": [-178.3370361328125, 207.57061767578125, 306.7945251464844, 110.91847229003906, -309.0106201171875, 35.44599914550781, -104.25027465820312, 67.15875244140625, -127.3450927734375, 479.43670654296875, 74.71569061279297, -221.37179565429688, 486.7572937011719, 662.3225708007812, 212.7114715576172, -91.12440490722656, 344.6890563964844, 25.83391571044922, 164.59288024902344, -78.07856750488281, 324.5382995605469, 595.4917602539062, 379.8739318847656, 16.031082153320312, 607.5840454101562, 975.8141479492188, 363.3875732421875, 122.31380462646484, 581.163818359375, 192.31683349609375, 183.58596801757812, -76.76028442382812, -234.04824829101562, 660.2659301757812, 361.21966552734375, 365.409912109375, 846.1091918945312, 351.28948974609375, 231.98895263671875, 141.19325256347656, 243.66763305664062, 148.9573974609375, 86.23675537109375, -172.13897705078125, 539.3721313476562, -133.02577209472656, 325.461181640625, 65.84996032714844, 536.261962890625, 593.3414306640625, 384.38134765625, 304.88653564453125, 734.4830322265625, 331.1398010253906, 735.8528442382812, 249.11807250976562, -69.71017456054688, 29.202072143554688, -101.2818603515625, 138.50289916992188, 115.0107650756836, 134.87762451171875, 317.3227233886719, -334.91253662109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000366.npy"}
|
|
{"epoch": 0.5374449339207048, "step": 367, "batch_size": 64, "mean": 186.90333557128906, "std": 238.9384002685547, "min": -500.6032409667969, "p10": -73.23221511840819, "median": 140.16596221923828, "p90": 502.54587097167973, "max": 799.8734130859375, "pos_frac": 0.8125, "sample": [45.76017761230469, 122.1705322265625, 462.0465087890625, 71.693603515625, 627.0469970703125, 10.351438522338867, 475.13458251953125, 131.09890747070312, 509.04376220703125, 71.6525650024414, 27.509048461914062, 14.81260871887207, -65.28839111328125, 519.7479858398438, 457.6923828125, 245.1343994140625, 420.48956298828125, 129.42050170898438, 226.6813507080078, -76.63671112060547, 304.1075744628906, 412.2551574707031, -36.46479034423828, 3.9834823608398438, 332.3192443847656, 289.8069763183594, 799.8734130859375, -54.22154235839844, 566.9968872070312, 90.79644775390625, 411.4115905761719, 187.19593811035156, 11.53592300415039, 326.63458251953125, 64.42634582519531, 230.19482421875, 316.73223876953125, 333.42132568359375, 417.55914306640625, 91.06294250488281, 161.29464721679688, -235.39743041992188, -202.2199249267578, -78.074462890625, -97.39697265625, 70.36080169677734, 87.20132446289062, 405.8268737792969, 144.90406799316406, 54.732818603515625, -225.31716918945312, -500.6032409667969, 135.4278564453125, 122.92935180664062, 267.305419921875, 348.0206298828125, 487.3841247558594, 315.3201904296875, 525.0726928710938, 13.374801635742188, 150.05091857910156, -7.058799743652344, 512.127685546875, -18.642578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000367.npy"}
|
|
{"epoch": 0.5389133627019089, "step": 368, "batch_size": 64, "mean": 286.42449951171875, "std": 226.30055236816406, "min": -225.81858825683594, "p10": 4.588896942138673, "median": 300.4633026123047, "p90": 556.7817382812501, "max": 1036.411865234375, "pos_frac": 0.921875, "sample": [299.79833984375, 376.8575134277344, 219.18345642089844, 490.2948913574219, 391.98309326171875, 581.4668579101562, 178.17755126953125, 27.887680053710938, 532.0224609375, 206.04360961914062, 3.4133644104003906, 415.3477783203125, 477.45611572265625, 4.081264495849609, 301.1282653808594, 344.2088317871094, 289.4712829589844, 190.05430603027344, -72.17029571533203, 430.7484436035156, 206.87979125976562, 259.39691162109375, 597.436767578125, 335.478515625, 349.3165283203125, 125.32705688476562, 397.00152587890625, 5.773372650146484, 67.71600341796875, 40.672874450683594, 157.28013610839844, 470.3797607421875, 383.0622253417969, 382.9200439453125, 394.3975830078125, 563.0296630859375, 395.37103271484375, 18.688905715942383, 182.23277282714844, -41.269493103027344, 319.39044189453125, 542.2032470703125, 474.6313781738281, 601.6947021484375, 143.2366180419922, 320.8206787109375, 666.2593383789062, 21.263198852539062, -225.81858825683594, 99.46978759765625, 105.53351593017578, 511.1685485839844, 466.3869323730469, 505.8788757324219, 358.5680236816406, 18.259153366088867, 209.16848754882812, 1036.411865234375, 160.3609619140625, 223.3074951171875, 290.420654296875, -27.607315063476562, -85.12040710449219, 616.7339477539062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000368.npy"}
|
|
{"epoch": 0.540381791483113, "step": 369, "batch_size": 64, "mean": 144.11106872558594, "std": 285.2254943847656, "min": -662.96337890625, "p10": -185.36820220947263, "median": 127.80835723876953, "p90": 520.0304779052735, "max": 827.2373046875, "pos_frac": 0.765625, "sample": [-285.6056213378906, -149.56814575195312, 827.2373046875, -92.64522552490234, -248.92567443847656, 4.18855094909668, -64.66487884521484, 303.65960693359375, 36.186641693115234, 679.16357421875, 233.01406860351562, -130.93438720703125, 130.72665405273438, 319.20294189453125, 301.81103515625, 803.587158203125, 505.2646179199219, 114.10803985595703, 11.96860122680664, 396.1366882324219, 296.76141357421875, 100.70384216308594, 94.24984741210938, 266.0636901855469, 201.30953979492188, 175.69232177734375, -511.66937255859375, 261.2530517578125, 179.01821899414062, 129.2945556640625, 220.23556518554688, 310.3756103515625, 119.48711395263672, 126.32215881347656, 175.02444458007812, 82.43550872802734, 336.5901184082031, 79.0167465209961, -157.44740295410156, 385.96868896484375, 526.3587036132812, -205.4956512451172, 688.552001953125, 23.923995971679688, 471.0660095214844, 204.07366943359375, 390.37847900390625, -426.0792541503906, 0.16925430297851562, 558.1821899414062, 156.95559692382812, 118.77693176269531, -662.96337890625, 79.56620025634766, 139.29205322265625, -197.33425903320312, -130.51356506347656, 96.19963836669922, 101.08325958251953, 555.2603759765625, 156.28854370117188, -55.449989318847656, 108.30172729492188, -38.08171844482422], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000369.npy"}
|
|
{"epoch": 0.5418502202643172, "step": 370, "batch_size": 64, "mean": 152.9559783935547, "std": 291.0430603027344, "min": -433.74688720703125, "p10": -215.42227630615233, "median": 161.16058349609375, "p90": 556.5637268066407, "max": 893.702392578125, "pos_frac": 0.71875, "sample": [54.237220764160156, 285.5188293457031, 260.65606689453125, -419.1605224609375, 645.0167846679688, 493.43182373046875, 108.47846221923828, 290.08154296875, 22.402921676635742, -125.2692642211914, 424.57012939453125, 530.538330078125, 42.593414306640625, 142.477294921875, -118.87397766113281, 629.771728515625, 228.70355224609375, 237.686767578125, 220.65756225585938, 893.702392578125, 810.390380859375, 280.69342041015625, 270.2837219238281, -170.90170288085938, 281.6763610839844, -86.24906158447266, 5.54644775390625, 320.41522216796875, -207.80203247070312, 175.35382080078125, 567.7174682617188, 186.546142578125, 37.55046081542969, 305.74725341796875, -23.846599578857422, 415.9362487792969, 239.83053588867188, -179.92437744140625, -378.461181640625, 29.110410690307617, 158.70303344726562, 229.35595703125, -218.68809509277344, 96.69729614257812, 441.02667236328125, 95.56207275390625, 101.84771728515625, 297.3499755859375, 227.02854919433594, -433.74688720703125, -82.80216979980469, -48.95989990234375, 102.35260009765625, 163.61813354492188, -303.7436218261719, -278.3916320800781, 10.509513854980469, -315.88629150390625, -58.45071029663086, 526.8826904296875, 579.6236572265625, 257.056640625, -57.06146240234375, 572.464599609375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000370.npy"}
|
|
{"epoch": 0.5433186490455213, "step": 371, "batch_size": 64, "mean": 211.5328369140625, "std": 305.36083984375, "min": -555.2072143554688, "p10": -116.16462173461913, "median": 167.8176040649414, "p90": 653.924353027344, "max": 933.1671752929688, "pos_frac": 0.75, "sample": [346.250732421875, 259.5673522949219, 498.0272521972656, 72.07393646240234, -555.2072143554688, 141.86264038085938, 531.0560913085938, 166.09884643554688, 874.1651000976562, 335.9478759765625, 504.84503173828125, 21.13686180114746, 322.4808349609375, 223.8604736328125, -312.0935363769531, 86.06819152832031, -4.474884033203125, 196.91891479492188, 494.1829833984375, 44.21563720703125, -31.761945724487305, -238.06861877441406, 692.5279541015625, -118.78401184082031, -200.08135986328125, 159.22474670410156, 907.5377807617188, 702.7280883789062, -236.08607482910156, 8.502531051635742, 678.140380859375, -42.24803161621094, 374.8764343261719, 294.50872802734375, -97.20763397216797, -94.85304260253906, 597.4202880859375, -110.0527114868164, 256.35064697265625, 473.9091796875, -67.22891235351562, 117.15918731689453, 714.5341796875, 77.8843765258789, 13.97610092163086, 253.83773803710938, 294.8247985839844, 435.20526123046875, 507.27685546875, -29.218521118164062, 426.38427734375, 139.53533935546875, 100.86036682128906, 336.9678955078125, 169.53636169433594, 169.662109375, 128.4042205810547, 67.53379821777344, -21.789291381835938, 471.9393310546875, 241.49432373046875, 933.1671752929688, -173.4009246826172, 5.988777160644531], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000371.npy"}
|
|
{"epoch": 0.5447870778267254, "step": 372, "batch_size": 64, "mean": 246.97903442382812, "std": 337.7304382324219, "min": -424.3111572265625, "p10": -146.55095672607422, "median": 154.3909912109375, "p90": 738.8876586914063, "max": 1316.199951171875, "pos_frac": 0.765625, "sample": [332.16314697265625, 340.65228271484375, 695.8573608398438, -300.8648681640625, 23.531936645507812, 729.140380859375, 552.672607421875, 231.71347045898438, 321.4162292480469, -19.083343505859375, 1316.199951171875, 361.48876953125, -5.62115478515625, 156.1480712890625, -102.48989868164062, 104.89221954345703, 60.32175064086914, 66.32422637939453, 763.3995361328125, -199.8675537109375, 210.0777130126953, 139.76348876953125, 669.210205078125, -144.20852661132812, 519.6446533203125, 130.93893432617188, 87.95455169677734, 149.08169555664062, 81.362060546875, 152.6339111328125, 115.98556518554688, 357.5380554199219, 431.65240478515625, -158.84750366210938, 147.08131408691406, -122.7125015258789, 378.8746032714844, 743.0650634765625, 58.81598663330078, 1071.9991455078125, 518.5758056640625, 398.9105224609375, 152.63279724121094, -237.83456420898438, 381.7634582519531, 516.9227294921875, 774.5266723632812, 253.82591247558594, -12.285263061523438, 381.85638427734375, 106.98904418945312, 265.2073974609375, 112.49971008300781, -51.816673278808594, 780.9595947265625, 179.16319274902344, -2.717531204223633, 150.5467529296875, -147.5548553466797, 873.7578125, -241.63223266601562, -424.3111572265625, 449.50482177734375, 179.26174926757812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000372.npy"}
|
|
{"epoch": 0.5462555066079295, "step": 373, "batch_size": 64, "mean": 239.539794921875, "std": 280.9273681640625, "min": -569.7874755859375, "p10": -60.84494857788085, "median": 224.2378692626953, "p90": 617.1910034179688, "max": 842.0826416015625, "pos_frac": 0.8125, "sample": [148.48028564453125, -161.5550079345703, -209.5584716796875, 268.25860595703125, 17.884904861450195, 842.0826416015625, 415.62982177734375, 137.8546600341797, 296.3531799316406, 174.55300903320312, 587.5840454101562, 249.33876037597656, -53.999542236328125, 264.33416748046875, 390.0818786621094, 40.98224639892578, 29.317211151123047, 716.9075317382812, 331.2644348144531, -63.77869415283203, -357.323486328125, -93.70651245117188, 207.97409057617188, -569.7874755859375, 140.43475341796875, 56.261993408203125, 179.35226440429688, 139.72393798828125, 621.82177734375, 369.2856140136719, 223.4581756591797, 448.71246337890625, 320.81512451171875, 531.4951782226562, 451.5916442871094, 72.31501770019531, -171.80752563476562, 606.3858642578125, 317.604248046875, -18.075851440429688, 830.1809692382812, 145.97137451171875, 403.73040771484375, 215.73394775390625, 346.7018737792969, 443.5205078125, 320.5823669433594, 225.01756286621094, 564.9942016601562, 788.8724365234375, 255.0555419921875, 21.375244140625, 102.2073974609375, 157.05511474609375, -3.6223297119140625, 285.19854736328125, 112.88209533691406, 100.08724975585938, 771.4614868164062, 360.57940673828125, 378.1553039550781, 651.5628051757812, -36.31791687011719, -8.985969543457031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000373.npy"}
|
|
{"epoch": 0.5477239353891337, "step": 374, "batch_size": 64, "mean": 215.2000732421875, "std": 357.7436218261719, "min": -800.4982299804688, "p10": -130.60262298583982, "median": 142.58795928955078, "p90": 522.6642333984375, "max": 1285.9747314453125, "pos_frac": 0.765625, "sample": [-138.56842041015625, 123.9933090209961, -86.25550842285156, 1285.9747314453125, 380.581787109375, 1120.2998046875, 330.144775390625, 615.24609375, 426.4881591796875, 32.271202087402344, 139.38992309570312, 352.0225524902344, 31.68145751953125, 212.49322509765625, 65.58114624023438, -50.14698028564453, 485.97540283203125, 100.97293090820312, 506.99810791015625, 345.8737487792969, -84.6602783203125, 57.89022445678711, 401.067626953125, 39.83393859863281, -31.177188873291016, 960.3485107421875, 46.004554748535156, 81.87705993652344, -175.93296813964844, 347.17108154296875, 280.2792053222656, 279.7377014160156, -170.62576293945312, 973.1715698242188, -510.129638671875, 106.71318054199219, 21.822189331054688, 515.5440063476562, 61.63823699951172, -38.474853515625, 145.78599548339844, 120.73694610595703, 80.22272491455078, 87.3311996459961, 194.48744201660156, 296.1213073730469, -154.3786163330078, 477.0317687988281, 301.47039794921875, 445.86700439453125, 104.50938415527344, 525.7157592773438, -112.01576232910156, 395.7195129394531, 297.3232727050781, -70.46562957763672, -154.14169311523438, 180.77789306640625, -2.4892425537109375, 156.88121032714844, 1189.4637451171875, -800.4982299804688, 172.9630126953125, 451.2691955566406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000374.npy"}
|
|
{"epoch": 0.5491923641703378, "step": 375, "batch_size": 64, "mean": 197.35800170898438, "std": 318.6018981933594, "min": -742.9003295898438, "p10": -198.37783813476562, "median": 196.18629455566406, "p90": 575.442645263672, "max": 989.7745361328125, "pos_frac": 0.765625, "sample": [644.92236328125, 381.7682800292969, -34.844970703125, 263.0864562988281, -121.28376770019531, -208.70379638671875, 474.021240234375, 219.6011962890625, 346.5732421875, -87.31590270996094, 73.29485321044922, -77.12983703613281, 64.58394622802734, -742.9003295898438, 130.8294677734375, 907.92578125, 345.8332214355469, 48.791282653808594, 316.66949462890625, 106.73107147216797, 25.838897705078125, 260.6560974121094, -478.86541748046875, 40.76294708251953, 442.144775390625, -190.89878845214844, 586.7169799804688, 70.7008056640625, 138.24041748046875, 2.6904067993164062, -398.65264892578125, 1.9223785400390625, 549.1358642578125, 598.9091186523438, 49.28837966918945, 252.79074096679688, 333.8683776855469, 499.48724365234375, 481.5753173828125, 654.9127807617188, 415.3006896972656, 188.49533081054688, 126.7633285522461, -286.2242736816406, 453.56231689453125, 475.2049865722656, 295.9607849121094, -201.58314514160156, 989.7745361328125, 521.5700073242188, 403.5876770019531, -230.16897583007812, -2.373260498046875, 251.61361694335938, 126.15567016601562, 521.9385375976562, 91.42835998535156, 591.9611206054688, -95.8761215209961, 203.87725830078125, 444.7010498046875, 374.0519714355469, 89.37429809570312, -91.86109924316406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000375.npy"}
|
|
{"epoch": 0.5506607929515418, "step": 376, "batch_size": 64, "mean": 256.3907470703125, "std": 320.7525634765625, "min": -536.20947265625, "p10": -144.7107452392578, "median": 227.43343353271484, "p90": 663.0728820800781, "max": 852.4658813476562, "pos_frac": 0.78125, "sample": [-219.16653442382812, 750.3350830078125, 177.38418579101562, 54.93210220336914, 852.4658813476562, 103.36863708496094, 461.65216064453125, -252.7411346435547, -18.343276977539062, 20.366928100585938, 545.508056640625, 0.30138206481933594, 640.9308471679688, 105.79151916503906, -327.6172790527344, 454.45465087890625, 543.8006591796875, -195.89816284179688, -122.95065307617188, 156.2429962158203, 86.86228942871094, 28.506853103637695, -154.0364990234375, 798.7743530273438, 751.39306640625, 38.78874969482422, 449.91754150390625, 257.0645751953125, 548.6778564453125, -73.50935363769531, 754.3651733398438, 533.1175537109375, 134.41969299316406, 590.368896484375, 196.1209716796875, 572.006103515625, 528.2178344726562, 253.40042114257812, 418.56915283203125, 235.95761108398438, 182.23289489746094, 484.3288269042969, -63.83122253417969, 237.69091796875, 567.586669921875, 158.91453552246094, 838.6912841796875, -62.969688415527344, -164.0808563232422, 109.21009063720703, 24.548690795898438, -85.0010757446289, 659.9598388671875, 664.4070434570312, 279.5654296875, 609.5604248046875, 227.99717712402344, 417.034423828125, 226.86968994140625, 278.1396484375, -46.975303649902344, 146.5572967529297, 574.98046875, -536.20947265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000376.npy"}
|
|
{"epoch": 0.5521292217327459, "step": 377, "batch_size": 64, "mean": 251.91156005859375, "std": 305.2445068359375, "min": -438.8543701171875, "p10": -76.87665405273435, "median": 253.83515167236328, "p90": 676.254052734375, "max": 984.6080322265625, "pos_frac": 0.765625, "sample": [371.28680419921875, 984.6080322265625, 134.66473388671875, -83.46438598632812, 295.8882141113281, -38.184539794921875, 220.38442993164062, -278.4700927734375, 270.5880432128906, 93.51764678955078, 805.5348510742188, 757.53759765625, -26.517173767089844, 893.4033203125, -26.666345596313477, 214.66580200195312, 149.9685516357422, 477.4300842285156, 260.2167663574219, -31.758527755737305, 258.8414306640625, 590.8539428710938, 550.41650390625, -23.074947357177734, 585.4483642578125, 879.5789184570312, 362.1751403808594, -273.1805419921875, 697.3792724609375, 113.58182525634766, 332.47259521484375, 248.82887268066406, 25.53191375732422, 146.43966674804688, 22.611263275146484, 424.0797119140625, 142.8241424560547, 317.157470703125, 449.67218017578125, 658.1840209960938, 312.8846130371094, -61.505279541015625, -438.8543701171875, 229.10595703125, -144.81723022460938, 81.97876739501953, 554.957763671875, 341.26727294921875, -213.6072235107422, 337.3668518066406, 12.299545288085938, 683.9983520507812, -25.211318969726562, 55.126708984375, -14.674873352050781, 558.3873901367188, 279.82794189453125, 585.1397705078125, 357.09228515625, 10.346855163574219, 351.9781494140625, 84.40444946289062, 324.05615234375, -93.66472625732422], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000377.npy"}
|
|
{"epoch": 0.55359765051395, "step": 378, "batch_size": 64, "mean": 265.88409423828125, "std": 286.7349548339844, "min": -509.3359375, "p10": -68.63469619750977, "median": 276.4412841796875, "p90": 625.69287109375, "max": 1081.8577880859375, "pos_frac": 0.828125, "sample": [758.9210815429688, 368.8033752441406, 631.67822265625, 604.4568481445312, 478.08319091796875, 101.54261016845703, 197.93942260742188, 356.1951904296875, 134.085205078125, 298.2956237792969, 360.2818603515625, 410.106201171875, 634.3348388671875, 179.96949768066406, 115.14007568359375, -31.381393432617188, 291.57122802734375, 660.9367065429688, 242.037109375, 420.95086669921875, 308.706298828125, -68.67501831054688, -157.397705078125, 58.901161193847656, 268.833251953125, 205.03366088867188, -90.11311340332031, 467.69830322265625, 142.37814331054688, 394.4112548828125, 534.6387329101562, 414.4008483886719, 307.4994812011719, 70.74880981445312, -19.05602264404297, 526.5549926757812, 230.9556884765625, 1081.8577880859375, 40.03248596191406, -328.67694091796875, 286.9138488769531, -46.56267547607422, 230.82273864746094, 611.72705078125, 130.79855346679688, -104.28360748291016, 476.7385559082031, 310.14801025390625, 493.9311218261719, 337.5266418457031, 74.18086242675781, 284.04931640625, 495.76690673828125, 242.3724365234375, -509.3359375, 186.7725067138672, 260.9400329589844, 443.7025451660156, -68.54061126708984, 834.6303100585938, 102.79631042480469, -392.09210205078125, 68.79300689697266, 662.1060791015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000378.npy"}
|
|
{"epoch": 0.5550660792951542, "step": 379, "batch_size": 64, "mean": 264.1388854980469, "std": 325.6307067871094, "min": -465.83197021484375, "p10": -96.35971145629883, "median": 209.9394073486328, "p90": 692.6810424804688, "max": 1391.26513671875, "pos_frac": 0.78125, "sample": [300.00390625, 408.379150390625, -12.425994873046875, 207.9559326171875, -95.94969940185547, 124.97016143798828, 181.84707641601562, 669.07373046875, 199.38027954101562, 184.61697387695312, 362.6517639160156, 841.654541015625, 34.77909851074219, 515.7347412109375, 363.1591796875, 210.78915405273438, -465.83197021484375, 901.2714233398438, 17.003158569335938, 236.12933349609375, 510.0704345703125, 455.80548095703125, 275.59124755859375, -38.90537643432617, 126.34454345703125, -81.6488037109375, -310.67047119140625, 173.79910278320312, -35.41655731201172, 433.6729736328125, 733.0685424804688, 462.74749755859375, -274.14190673828125, 180.8690643310547, 507.9159240722656, 723.519287109375, 366.76898193359375, 387.37677001953125, 702.7984619140625, 146.2224578857422, -78.28862762451172, 533.588623046875, 209.08966064453125, 35.76393508911133, 433.784912109375, -96.53543090820312, 186.74526977539062, 413.0562438964844, 234.4065704345703, 158.14878845214844, -132.68064880371094, 388.4658203125, 838.8356323242188, 251.25697326660156, 1391.26513671875, 360.17230224609375, 41.53862762451172, 546.9879760742188, 151.2036895751953, -207.48577880859375, 152.72035217285156, 661.1583251953125, -176.55667114257812, -22.732101440429688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000379.npy"}
|
|
{"epoch": 0.5565345080763583, "step": 380, "batch_size": 64, "mean": 288.2654724121094, "std": 326.82598876953125, "min": -830.8529052734375, "p10": -67.94820404052733, "median": 310.5505065917969, "p90": 676.8174194335938, "max": 1179.8302001953125, "pos_frac": 0.859375, "sample": [137.63204956054688, 112.71217346191406, 67.41777038574219, 1179.8302001953125, 146.56536865234375, 110.0942611694336, 148.33041381835938, 388.64105224609375, 401.63653564453125, 310.7828369140625, 128.33721923828125, 425.2465515136719, -51.54131317138672, 591.9373168945312, 871.130859375, 503.17822265625, 386.8517150878906, 486.56256103515625, 514.6534423828125, 177.96136474609375, 355.17584228515625, -305.19757080078125, 192.64068603515625, 296.3555908203125, 73.91122436523438, -74.97972869873047, 434.0621337890625, 320.4134216308594, 287.1896667480469, 687.2745361328125, 422.61370849609375, 622.5206298828125, 581.8954467773438, 652.41748046875, 642.1536865234375, 428.791748046875, 90.67427062988281, -47.84027099609375, 270.4036560058594, 69.26708221435547, 913.6405029296875, 81.29536437988281, -279.2655944824219, 181.48712158203125, -200.92041015625, 221.90489196777344, 357.51922607421875, 693.28662109375, 64.10670471191406, -347.5338134765625, 429.09613037109375, 60.348968505859375, 748.7452392578125, 349.16693115234375, 310.31817626953125, -134.01388549804688, 463.6331787109375, 362.8774108886719, 215.10296630859375, 204.33770751953125, -830.8529052734375, 333.267333984375, 790.751708984375, 422.9879150390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000380.npy"}
|
|
{"epoch": 0.5580029368575624, "step": 381, "batch_size": 64, "mean": 204.197998046875, "std": 355.566162109375, "min": -759.5465087890625, "p10": -163.83786315917968, "median": 183.30817413330078, "p90": 628.0960021972659, "max": 1170.199462890625, "pos_frac": 0.71875, "sample": [267.7632751464844, -37.620941162109375, -19.732267379760742, 654.8807983398438, 490.45928955078125, 678.2053833007812, 522.4910278320312, -39.12345886230469, 133.56985473632812, 549.1517333984375, 470.1584777832031, 108.32426452636719, 407.1675109863281, 242.62353515625, 1062.4990234375, -145.175048828125, 484.1966552734375, 366.35504150390625, 695.33837890625, 332.5325622558594, 1029.758056640625, 674.533935546875, -196.5928955078125, -21.906082153320312, -171.83621215820312, 124.52423095703125, 553.0634155273438, 30.492656707763672, -353.1637268066406, -43.602882385253906, 182.1049041748047, 291.393310546875, 50.46543884277344, -34.87697982788086, 265.89898681640625, -138.9693145751953, 264.14501953125, 163.34585571289062, 181.57400512695312, -77.05972290039062, 63.82610321044922, 102.193603515625, 1170.199462890625, 214.8003692626953, 431.2032470703125, 392.4212951660156, 11.90625, -205.58544921875, 565.59814453125, -126.49333190917969, 49.458335876464844, 363.5232238769531, -108.34732055664062, 227.53468322753906, 55.807899475097656, -307.0890808105469, 95.54632568359375, 329.8258972167969, 436.92059326171875, 184.51144409179688, 225.64979553222656, -759.5465087890625, -690.7086181640625, 348.15771484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000381.npy"}
|
|
{"epoch": 0.5594713656387665, "step": 382, "batch_size": 64, "mean": 245.88180541992188, "std": 272.1447448730469, "min": -331.8670654296875, "p10": -93.93155288696288, "median": 204.46183013916016, "p90": 601.6862182617188, "max": 963.4157104492188, "pos_frac": 0.84375, "sample": [963.4157104492188, 607.2066650390625, 120.6460189819336, 71.33797454833984, 155.63050842285156, 821.901123046875, -101.66545867919922, 110.16600799560547, 338.6314697265625, -178.53836059570312, 160.2736053466797, 236.4810028076172, 395.96405029296875, 478.356689453125, 581.0757446289062, 566.8243408203125, 210.73362731933594, 43.03114318847656, 409.5172424316406, 117.88632202148438, 421.3005676269531, -118.06303405761719, 14.952835083007812, 360.3147277832031, 736.6861572265625, -171.74130249023438, 351.1578063964844, -75.88577270507812, 552.2406005859375, 238.57586669921875, 639.3739013671875, -110.63140106201172, -25.782894134521484, 354.0559997558594, 506.5981140136719, -331.8670654296875, -198.59060668945312, 385.8797607421875, 14.032424926757812, 246.03488159179688, 202.62069702148438, 185.19766235351562, 306.81988525390625, 51.471519470214844, 217.7249298095703, 99.68357849121094, 108.03248596191406, 312.0988464355469, 206.30296325683594, 119.54493713378906, 632.739990234375, 575.3240356445312, 309.48443603515625, 15.102380752563477, 130.0694580078125, 769.1205444335938, 71.2919921875, -0.4917640686035156, 535.1785278320312, 588.80517578125, 146.21690368652344, 129.2138671875, 5.1582489013671875, 122.2069091796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000382.npy"}
|
|
{"epoch": 0.5609397944199707, "step": 383, "batch_size": 64, "mean": 239.28997802734375, "std": 356.1913757324219, "min": -935.57177734375, "p10": -174.63472747802734, "median": 240.76715087890625, "p90": 751.4206054687501, "max": 962.0069580078125, "pos_frac": 0.765625, "sample": [-23.910179138183594, -89.62164306640625, 163.42568969726562, -324.8317565917969, 511.05523681640625, -260.9442138671875, -207.661865234375, 616.404296875, 330.5643310546875, 1.5330085754394531, 392.255615234375, 149.50851440429688, 61.959529876708984, 0.3099212646484375, -374.4586181640625, 270.5746154785156, 962.0069580078125, 43.72760772705078, 241.67471313476562, 364.373046875, 815.6807861328125, -222.25051879882812, -8.431880950927734, 730.1802368164062, 229.26275634765625, -52.496437072753906, 757.2769775390625, 93.55726623535156, -31.098876953125, 299.97552490234375, 949.9242553710938, 266.51519775390625, 742.7630615234375, 244.8358154296875, 419.67242431640625, 406.68310546875, 430.0615539550781, 59.86460876464844, 181.3731689453125, 294.17401123046875, -97.67245483398438, -54.8834228515625, 239.85958862304688, 41.15047073364258, 355.0691223144531, 820.8465576171875, -174.41806030273438, 581.8446655273438, 4.4910430908203125, 315.30224609375, 820.6790771484375, 77.71988677978516, -174.7275848388672, 314.477294921875, 411.7486572265625, 42.620643615722656, 755.1309814453125, 573.1361083984375, 236.4354248046875, 567.3438110351562, -935.57177734375, 87.60476684570312, 481.8531494140625, 589.050537109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000383.npy"}
|
|
{"epoch": 0.5624082232011748, "step": 384, "batch_size": 64, "mean": 194.8409423828125, "std": 250.0452117919922, "min": -390.89239501953125, "p10": -110.0501998901367, "median": 207.04146575927734, "p90": 488.29281005859383, "max": 946.6728515625, "pos_frac": 0.796875, "sample": [195.46139526367188, 8.314117431640625, 946.6728515625, -182.4512176513672, 3.085205078125, 134.9896697998047, -221.08665466308594, -32.58988952636719, 732.9297485351562, 214.68679809570312, 468.2068176269531, 503.23443603515625, 255.3107147216797, -344.2304382324219, 408.2916259765625, 29.67208480834961, -120.76716613769531, 41.3591423034668, 356.08245849609375, 384.61865234375, 155.49710083007812, 294.81829833984375, 42.34794616699219, 365.44873046875, 270.7313537597656, 292.837890625, 143.71417236328125, -210.70828247070312, -390.89239501953125, 102.25794219970703, 343.4402160644531, 123.63044738769531, 48.626312255859375, 217.77728271484375, 191.16624450683594, -59.185218811035156, 420.88787841796875, 358.0942687988281, 370.2193603515625, 65.19598388671875, -21.321552276611328, 199.39613342285156, 369.6617126464844, 541.5026245117188, 496.9010925292969, 287.2274169921875, 274.332763671875, 273.5565185546875, 249.54837036132812, -187.251953125, 238.952880859375, -26.57256317138672, 73.95833587646484, 463.296630859375, 123.46157836914062, -82.5894775390625, -85.0439453125, 532.400634765625, 417.2725830078125, 132.77989196777344, 598.7017211914062, 353.1678771972656, 36.419036865234375, 282.366943359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000384.npy"}
|
|
{"epoch": 0.5638766519823789, "step": 385, "batch_size": 64, "mean": 233.38693237304688, "std": 249.66159057617188, "min": -395.346435546875, "p10": -45.68745040893554, "median": 201.6787567138672, "p90": 561.3955810546876, "max": 790.6387329101562, "pos_frac": 0.875, "sample": [-35.9866943359375, 50.04065704345703, -49.84491729736328, 269.65179443359375, 444.49676513671875, 247.49160766601562, 389.65594482421875, 121.83370971679688, -212.56289672851562, 489.638427734375, 148.81422424316406, 31.250349044799805, 161.13858032226562, 128.69061279296875, 449.50616455078125, 198.2142333984375, 637.3533935546875, 239.51512145996094, 112.3484115600586, 93.32593536376953, 354.0978088378906, 165.22340393066406, -160.9844512939453, 346.6339111328125, -222.35455322265625, 299.613525390625, 159.84776306152344, 549.4388427734375, 99.45637512207031, 47.85675048828125, 354.80731201171875, 192.76377868652344, 178.363037109375, 734.1473999023438, 790.6387329101562, 139.82777404785156, 172.12258911132812, 3.8562164306640625, 566.5198974609375, 307.841796875, 47.60954284667969, 107.94642639160156, 511.8928527832031, 187.5693817138672, 210.9666748046875, 649.0439453125, 451.1913146972656, -137.30233764648438, 759.35546875, 373.3707275390625, -395.346435546875, 64.98158264160156, 250.99151611328125, -273.93707275390625, 209.29002380371094, 334.8035888671875, 478.3038635253906, 114.1561050415039, 312.9514465332031, 697.7413330078125, 78.61653137207031, 368.81976318359375, 205.14328002929688, 334.315673828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000385.npy"}
|
|
{"epoch": 0.5653450807635829, "step": 386, "batch_size": 64, "mean": 256.977783203125, "std": 332.35858154296875, "min": -441.12030029296875, "p10": -142.90402832031245, "median": 194.27964782714844, "p90": 762.1091552734376, "max": 1050.802490234375, "pos_frac": 0.828125, "sample": [-85.96598815917969, 389.3719787597656, -67.53392791748047, 393.29913330078125, 778.0849609375, 180.1208038330078, 86.1889419555664, 423.0665283203125, 141.31625366210938, 476.77117919921875, 345.9722595214844, 1050.802490234375, -328.1644287109375, -60.492095947265625, 799.60693359375, 351.28851318359375, 353.45928955078125, 455.6652526855469, 12.10899543762207, 101.79090118408203, 93.6606674194336, 270.2020263671875, 282.5445251464844, 166.45172119140625, 293.6976013183594, 257.3354187011719, 96.5241470336914, -188.19754028320312, 988.3212280273438, 663.35595703125, -62.499969482421875, 88.26177978515625, 75.2567138671875, -265.9537658691406, 148.8030242919922, 807.63525390625, 985.9887084960938, 364.84234619140625, 724.832275390625, 117.67972564697266, 240.0390625, 633.91748046875, -271.32275390625, 145.6806182861328, 44.549530029296875, 166.07723999023438, 208.43849182128906, 159.81729125976562, -167.30604553222656, 434.4698181152344, -441.12030029296875, 927.2131958007812, 80.42264556884766, 76.6303939819336, 56.2603759765625, 128.83094787597656, 484.57720947265625, 319.95050048828125, -351.399658203125, 556.394287109375, 592.2927856445312, 274.1081237792969, 269.1438903808594, 173.41250610351562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000386.npy"}
|
|
{"epoch": 0.566813509544787, "step": 387, "batch_size": 64, "mean": 223.16220092773438, "std": 277.20703125, "min": -415.60443115234375, "p10": -125.57430496215818, "median": 200.62606048583984, "p90": 572.686492919922, "max": 920.0958862304688, "pos_frac": 0.796875, "sample": [320.4808044433594, -102.3714828491211, 159.2031707763672, 161.80149841308594, 351.3814392089844, 920.0958862304688, 200.61776733398438, 130.00244140625, 309.0351867675781, 539.6424560546875, -393.27276611328125, 316.4507141113281, 106.0692138671875, 32.8003044128418, -415.60443115234375, 218.92654418945312, 702.80224609375, 549.5098876953125, -140.06704711914062, 225.53097534179688, 775.7488403320312, 67.25498962402344, 334.4585876464844, 778.3779296875, 455.8293762207031, 176.54129028320312, 259.6824645996094, 150.69534301757812, 154.52670288085938, 173.08154296875, 65.74857330322266, 154.49842834472656, 466.1177062988281, 46.1493034362793, 600.66455078125, 161.091796875, 418.1322021484375, 135.68846130371094, 301.3185119628906, 382.9356384277344, 94.74959564208984, -63.80622482299805, 625.6415405273438, 459.8956604003906, 349.1202087402344, 55.30950927734375, 416.2869567871094, -2.775035858154297, 582.6193237304688, -57.96864318847656, 384.974609375, -211.24789428710938, 28.249168395996094, 343.4234313964844, -221.45278930664062, 200.6343536376953, -10.812324523925781, 464.55401611328125, -135.51837158203125, -29.720535278320312, 253.60887145996094, -299.1853332519531, 279.1785888671875, 525.0457763671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000387.npy"}
|
|
{"epoch": 0.5682819383259912, "step": 388, "batch_size": 64, "mean": 288.08319091796875, "std": 361.1645202636719, "min": -620.8928833007812, "p10": -66.18918991088867, "median": 222.38197326660156, "p90": 704.141638183594, "max": 1600.2000732421875, "pos_frac": 0.828125, "sample": [1600.2000732421875, 637.204345703125, -67.89064025878906, 538.1802978515625, 537.2846069335938, 851.2844848632812, 168.79359436035156, -288.7144775390625, 164.4122314453125, 287.64044189453125, 171.98239135742188, 944.0479736328125, 614.6039428710938, 231.81985473632812, -620.8928833007812, 2.6026687622070312, 334.9822998046875, 254.53399658203125, 515.5960693359375, -62.219139099121094, 107.76592254638672, 783.4288330078125, 172.81332397460938, -51.02704620361328, 163.2014617919922, 209.5059814453125, 82.53258514404297, -123.13441467285156, 161.54296875, 433.2308654785156, 67.76421356201172, 105.86503601074219, 267.86724853515625, 77.24874877929688, 490.3431396484375, 182.06988525390625, 114.39884948730469, 61.251197814941406, 610.8781127929688, 103.4202651977539, 423.3474426269531, 121.43870544433594, 431.56903076171875, 167.98577880859375, 212.944091796875, -1.8365402221679688, 442.2259521484375, 234.20843505859375, -268.8436584472656, 411.5143127441406, 339.1917724609375, 1288.683349609375, -206.2298126220703, 379.484619140625, 234.0583038330078, 26.03350830078125, -26.699310302734375, 545.5624389648438, 723.2786865234375, 819.8009643554688, 659.488525390625, 520.65283203125, 257.190673828125, -134.14659118652344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000388.npy"}
|
|
{"epoch": 0.5697503671071953, "step": 389, "batch_size": 64, "mean": 234.0470733642578, "std": 321.1650390625, "min": -415.9674072265625, "p10": -142.9644607543945, "median": 242.99939727783203, "p90": 583.9732910156251, "max": 1017.09130859375, "pos_frac": 0.71875, "sample": [64.74459838867188, 496.23809814453125, 395.0863952636719, -363.6294250488281, 184.00291442871094, 268.59771728515625, 176.3941192626953, 563.1586303710938, 486.3382568359375, 226.2794189453125, 334.9101867675781, -111.31558990478516, 257.6596984863281, -206.54098510742188, 143.44508361816406, 523.990234375, 124.3941421508789, 444.27093505859375, 428.5052795410156, -86.71237182617188, -350.17620849609375, -0.19015121459960938, 592.8938598632812, -18.996646881103516, 311.0339050292969, 707.4581909179688, -415.9674072265625, -27.180191040039062, 106.3836898803711, 150.3654022216797, 272.09027099609375, -149.53097534179688, 139.70765686035156, 337.6756591796875, -328.6693115234375, 371.4110107421875, 228.33909606933594, 477.8174743652344, 407.1841735839844, 984.257568359375, 450.07257080078125, -39.95747375488281, 439.0603332519531, 460.7601318359375, 187.85552978515625, 396.6429748535156, -5.861268997192383, 112.08723449707031, 458.13897705078125, 125.7160415649414, 83.10767364501953, 1017.09130859375, -127.64259338378906, -98.91387176513672, 257.7123107910156, 821.9443359375, -75.1278076171875, 942.0886840820312, 278.302734375, -35.56754684448242, 331.6452331542969, -243.4842071533203, 293.663818359375, 803.9529418945312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000389.npy"}
|
|
{"epoch": 0.5712187958883994, "step": 390, "batch_size": 64, "mean": 237.1005096435547, "std": 384.1810607910156, "min": -661.1967163085938, "p10": -163.56034851074216, "median": 234.66807556152344, "p90": 568.610302734375, "max": 2227.20654296875, "pos_frac": 0.765625, "sample": [235.59776306152344, 313.41650390625, 588.50146484375, 501.1356201171875, -661.1967163085938, -175.48880004882812, -260.3409729003906, 125.94908142089844, 385.61773681640625, 280.3222351074219, 503.2070617675781, 52.28645324707031, 307.31976318359375, 414.7024841308594, 224.33657836914062, -7.818023681640625, -11.099235534667969, 2227.20654296875, -132.80093383789062, 434.38958740234375, 100.30770874023438, 525.2811279296875, 569.2423095703125, 152.5724639892578, 153.4369354248047, -135.727294921875, 106.00252532958984, -48.529449462890625, 157.91697692871094, 270.4439697265625, 175.91177368164062, -20.175933837890625, -183.728759765625, -74.8221206665039, 160.0256805419922, 567.1356201171875, 833.9620361328125, 573.7869873046875, -379.79400634765625, 413.8204040527344, 557.9120483398438, 628.0159301757812, 416.9516906738281, 102.27556610107422, 585.16259765625, 94.78486633300781, 177.262451171875, 105.42430114746094, 543.426025390625, 416.05194091796875, 459.4875183105469, 233.73838806152344, 328.6294860839844, 259.98358154296875, 369.75445556640625, -35.715599060058594, 0.131134033203125, -308.1558837890625, -510.9963684082031, 309.28118896484375, 459.56402587890625, 45.505916595458984, 284.35882568359375, 389.2851257324219], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000390.npy"}
|
|
{"epoch": 0.5726872246696035, "step": 391, "batch_size": 64, "mean": 142.0626983642578, "std": 275.3839416503906, "min": -601.9620971679688, "p10": -146.12131042480468, "median": 108.77741241455078, "p90": 463.11053161621106, "max": 970.65625, "pos_frac": 0.734375, "sample": [2.246623992919922, 11.315353393554688, 62.124473571777344, -46.24449157714844, 178.26040649414062, 340.3180236816406, 36.127174377441406, 115.42313385009766, 15.451221466064453, 399.4897155761719, -122.6679916381836, 87.66883850097656, -226.4140625, -89.51079559326172, 67.48332214355469, 36.279808044433594, 392.75634765625, 130.00439453125, 34.511390686035156, -133.725341796875, 207.38267517089844, 506.8817443847656, 562.119140625, 475.3741760253906, -93.16059112548828, 124.85411071777344, 970.65625, -184.7589874267578, -48.611602783203125, 305.6964111328125, 371.62945556640625, -111.70635223388672, 83.80369567871094, 41.71536636352539, -76.47423553466797, 204.1413116455078, 102.1316909790039, -22.00566864013672, 434.495361328125, 162.7822265625, 24.00811004638672, -151.43386840820312, 148.47540283203125, 333.5442810058594, 730.635009765625, -377.5182800292969, 535.2811279296875, -23.981985092163086, -395.444580078125, 37.703514099121094, 56.70661926269531, 253.60177612304688, -189.17288208007812, 385.8101501464844, 406.070068359375, 118.71784973144531, 756.6785888671875, 406.1004638671875, 309.68438720703125, -601.9620971679688, 247.55337524414062, 231.43374633789062, 293.31488037109375, 248.36341857910156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000391.npy"}
|
|
{"epoch": 0.5741556534508077, "step": 392, "batch_size": 64, "mean": 272.79132080078125, "std": 261.2071228027344, "min": -155.08804321289062, "p10": -25.227830886840792, "median": 226.2935562133789, "p90": 603.1115051269531, "max": 1025.0677490234375, "pos_frac": 0.890625, "sample": [43.54315948486328, 172.61041259765625, -151.08993530273438, 59.624935150146484, 227.95802307128906, -38.151466369628906, 603.9874267578125, 255.77650451660156, 277.3010559082031, 339.34423828125, 601.0676879882812, -136.6637420654297, 301.7765197753906, 175.46389770507812, 146.7299346923828, 32.7333984375, 609.7012939453125, 760.5413818359375, 480.7523193359375, 201.20079040527344, -98.83733367919922, 345.85015869140625, 553.2160034179688, 50.09642028808594, 359.0274658203125, 400.6717529296875, 224.62908935546875, 1025.0677490234375, -100.60733032226562, 217.55111694335938, 219.44102478027344, 251.216552734375, 383.99169921875, 174.17782592773438, 473.60760498046875, 232.13133239746094, 342.21490478515625, 181.35377502441406, 184.1898956298828, 133.9834442138672, 445.47705078125, 428.23675537109375, 85.46444702148438, 69.52552032470703, 505.33880615234375, 852.1044921875, 377.96441650390625, 444.3976135253906, 382.1094970703125, -144.13926696777344, 129.08851623535156, 165.68980407714844, 84.62326049804688, 312.69757080078125, 168.67420959472656, 146.9866485595703, 329.7337646484375, 4.927318572998047, 989.420166015625, 28.730789184570312, -155.08804321289062, 81.38232421875, 425.9094543457031, 782.20703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000392.npy"}
|
|
{"epoch": 0.5756240822320118, "step": 393, "batch_size": 64, "mean": 209.79046630859375, "std": 318.2052001953125, "min": -266.19390869140625, "p10": -191.43768615722655, "median": 220.97095489501953, "p90": 589.4853637695314, "max": 1424.402099609375, "pos_frac": 0.71875, "sample": [272.81292724609375, 632.2771606445312, 5.628658294677734, -83.01807403564453, 357.55194091796875, 459.87725830078125, -202.10873413085938, 370.16119384765625, 263.3341979980469, 241.65176391601562, 62.74285888671875, 232.1048583984375, 123.3459243774414, 331.94195556640625, 270.2259521484375, 73.25707244873047, 973.5447998046875, 941.5052490234375, 613.4619140625, -7.800117492675781, 285.6928405761719, 547.495361328125, -245.9365234375, -224.96629333496094, -100.59904479980469, -6.695888519287109, -192.6373748779297, 220.61648559570312, 297.0244445800781, 310.8282470703125, 181.84417724609375, 416.4805908203125, 468.97943115234375, 217.63571166992188, 512.8466186523438, 279.13140869140625, 222.303955078125, 105.2236099243164, 421.815185546875, -32.38398742675781, 1.7895660400390625, 119.41690063476562, 607.4810791015625, -134.26974487304688, -197.69039916992188, 504.2044677734375, 314.6400451660156, -192.4483642578125, 31.43364715576172, 147.31515502929688, 111.63208770751953, 221.32542419433594, -154.27529907226562, -132.1727294921875, -189.07943725585938, 356.1396179199219, 370.58416748046875, -266.19390869140625, 307.6173095703125, 82.26879119873047, -92.65154266357422, 1424.402099609375, 633.4541015625, -65.53057861328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000393.npy"}
|
|
{"epoch": 0.5770925110132159, "step": 394, "batch_size": 64, "mean": 220.0300750732422, "std": 279.3369445800781, "min": -283.2396545410156, "p10": -119.54056167602538, "median": 166.6559600830078, "p90": 603.4959228515626, "max": 997.23095703125, "pos_frac": 0.78125, "sample": [121.77400207519531, 312.6904296875, -200.65841674804688, 418.3432922363281, 163.9783935546875, -127.93109130859375, 106.28221130371094, 136.5944366455078, 599.4949340820312, 57.40777587890625, 559.6004638671875, -20.18885040283203, -283.2396545410156, 26.801315307617188, -33.30567932128906, -176.82012939453125, -103.7497329711914, 680.6771850585938, 539.7657470703125, -184.1975555419922, 519.538818359375, -212.8466796875, 274.6436767578125, 303.6988830566406, 171.35597229003906, 304.5992431640625, 218.1228790283203, 47.25502014160156, -19.910423278808594, 605.2106323242188, 540.7787475585938, 689.1190795898438, -126.30805969238281, 419.20513916015625, 129.44810485839844, 46.776214599609375, 151.047119140625, 21.52904510498047, 132.8372802734375, 617.5804443359375, 30.31977081298828, 454.7571716308594, -1.8397941589355469, 169.8067169189453, 141.27085876464844, 32.0997314453125, 524.1776123046875, 789.16015625, 241.43821716308594, 157.21820068359375, 475.4366455078125, 289.43402099609375, 997.23095703125, 843.6415405273438, 324.718994140625, 35.971397399902344, 51.20634460449219, -33.84627914428711, 169.33352661132812, 373.7952575683594, 201.45281982421875, -56.7652587890625, 173.95562744140625, 270.9505310058594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000394.npy"}
|
|
{"epoch": 0.57856093979442, "step": 395, "batch_size": 64, "mean": 239.2242431640625, "std": 309.171630859375, "min": -496.8615417480469, "p10": -83.36632385253905, "median": 203.2417449951172, "p90": 689.4072692871096, "max": 955.0869140625, "pos_frac": 0.78125, "sample": [492.2329406738281, -194.84674072265625, 163.91448974609375, 72.1448974609375, 214.9586639404297, 341.23846435546875, 338.322509765625, 237.6673583984375, -248.53067016601562, 274.5289611816406, -5.4856109619140625, 514.899169921875, 390.8076171875, -46.295860290527344, 106.9984359741211, 27.391429901123047, 657.9913940429688, 321.1263427734375, 865.7005615234375, -36.77483367919922, 614.5010375976562, 42.936744689941406, 497.70977783203125, -92.36564636230469, 212.46871948242188, 28.78472900390625, 205.90353393554688, 376.21002197265625, -33.988548278808594, 17.303916931152344, 862.2958984375, 917.8936157226562, 410.7999267578125, 138.84095764160156, 347.21734619140625, 39.47296142578125, 160.3421630859375, 280.86090087890625, 872.210693359375, 152.65338134765625, 481.8140869140625, 470.8057556152344, -496.8615417480469, -84.79853820800781, -217.27529907226562, 313.0880126953125, -53.46519470214844, 761.2088012695312, 12.927011489868164, -80.02449035644531, 64.73150634765625, -165.7947998046875, 22.002920150756836, 955.0869140625, 702.8712158203125, 244.3605499267578, 200.5799560546875, 458.45025634765625, 174.00123596191406, 434.0527648925781, 449.6715393066406, 66.36009979248047, 94.3532485961914, -37.836734771728516], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000395.npy"}
|
|
{"epoch": 0.580029368575624, "step": 396, "batch_size": 64, "mean": 219.32781982421875, "std": 238.59107971191406, "min": -385.80340576171875, "p10": -33.53731002807613, "median": 173.92937469482422, "p90": 548.0099395751954, "max": 972.5863037109375, "pos_frac": 0.890625, "sample": [221.22178649902344, 563.4695434570312, 69.59371185302734, 196.71597290039062, 101.94422912597656, 568.208984375, 142.50531005859375, 179.254638671875, 142.8275146484375, 218.24453735351562, 335.66156005859375, 260.2418212890625, 224.21694946289062, 361.16015625, -125.13238525390625, 173.61204528808594, 188.97442626953125, 56.10508728027344, 59.82801818847656, 164.55889892578125, 124.2289810180664, 412.3569030761719, 829.9434204101562, 308.79779052734375, 195.74935913085938, 442.4659729003906, 263.0675048828125, 135.9444580078125, 159.42535400390625, 657.6717529296875, -81.50898742675781, 344.84051513671875, 112.3075180053711, 255.26522827148438, 63.43888473510742, 161.3639373779297, 415.448974609375, -159.6043701171875, 395.888671875, 636.5933837890625, 356.9255676269531, 13.72113037109375, -94.135498046875, 511.9375305175781, 42.5462646484375, -52.869754791259766, 438.17547607421875, 11.571723937988281, 70.11196899414062, 217.09796142578125, 776.5877075195312, 119.97377014160156, 74.2930908203125, -104.04922485351562, 67.22673034667969, 272.7848205566406, 311.2088317871094, 125.5271224975586, 77.33509063720703, -385.80340576171875, 113.91558837890625, 972.5863037109375, 174.2467041015625, 149.16726684570312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000396.npy"}
|
|
{"epoch": 0.5814977973568282, "step": 397, "batch_size": 64, "mean": 277.65472412109375, "std": 267.56640625, "min": -291.74761962890625, "p10": -8.544419479370106, "median": 228.4979248046875, "p90": 626.6746704101563, "max": 922.9066162109375, "pos_frac": 0.890625, "sample": [34.36093521118164, 521.9442749023438, 38.470672607421875, 361.8022155761719, 141.19589233398438, 92.23374938964844, 230.91424560546875, 264.61737060546875, 448.14605712890625, 174.9214324951172, 632.9766845703125, 3.6406097412109375, 561.9362182617188, -49.19415283203125, 146.69161987304688, 459.3058166503906, 226.08160400390625, -179.24594116210938, 409.037109375, 588.9052124023438, 253.13446044921875, 611.969970703125, 307.137939453125, 405.0778503417969, 922.9066162109375, 53.06037902832031, 652.018310546875, 440.05126953125, -143.5919189453125, 143.6556854248047, 540.2466430664062, 217.1309356689453, 93.5123291015625, 784.2882080078125, 488.450439453125, 252.60684204101562, 19.523944854736328, 342.09906005859375, 165.913330078125, 168.21392822265625, -291.74761962890625, -127.5208969116211, 88.94615936279297, -193.47308349609375, 568.180908203125, 182.76100158691406, 42.50189971923828, 157.36541748046875, -13.76657485961914, 498.943603515625, 51.06888961791992, 675.9547119140625, 205.34292602539062, 922.0821533203125, 168.43963623046875, 805.9459228515625, 115.57734680175781, 211.94305419921875, 518.434814453125, 164.94158935546875, 232.5841064453125, 325.66607666015625, 362.2184143066406, 271.36444091796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000397.npy"}
|
|
{"epoch": 0.5829662261380323, "step": 398, "batch_size": 64, "mean": 246.69529724121094, "std": 314.204833984375, "min": -380.7127990722656, "p10": -54.72208938598632, "median": 137.99402618408203, "p90": 593.7578735351562, "max": 1188.8519287109375, "pos_frac": 0.78125, "sample": [126.97991943359375, 60.82121276855469, 391.6595764160156, 699.5371704101562, -104.58863830566406, 91.21802520751953, 316.66656494140625, -290.75079345703125, 63.37730407714844, -25.29448699951172, 553.7286987304688, -318.49920654296875, 559.5128784179688, 391.90838623046875, 446.8818054199219, 967.131103515625, 163.61570739746094, 126.91735076904297, 557.7762451171875, 283.58148193359375, 479.079833984375, 129.61480712890625, 593.3515625, 405.7499084472656, 1054.3330078125, -10.92138671875, 171.62738037109375, 131.64088439941406, 584.9846801757812, 371.70062255859375, 33.736328125, 170.47982788085938, -66.51806640625, -5.8603668212890625, 248.55349731445312, 94.86683654785156, 42.315757751464844, 346.1251220703125, 60.194679260253906, 378.69793701171875, 116.99586486816406, 430.0843811035156, 129.69375610351562, 48.742408752441406, 133.94241333007812, -29.12482452392578, 387.77691650390625, -30.089385986328125, -51.75459289550781, 101.72140502929688, 142.04563903808594, 1188.8519287109375, 593.9320068359375, 750.4918212890625, -380.7127990722656, 491.0898132324219, 420.94305419921875, 479.7835388183594, 729.832275390625, 117.60028076171875, -50.00975799560547, 7.13818359375, -55.993873596191406, -160.41497802734375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000398.npy"}
|
|
{"epoch": 0.5844346549192364, "step": 399, "batch_size": 64, "mean": 234.14266967773438, "std": 300.6256103515625, "min": -384.1827392578125, "p10": -186.7093444824218, "median": 213.98451232910156, "p90": 614.1228271484376, "max": 1017.738037109375, "pos_frac": 0.75, "sample": [177.08413696289062, 281.86651611328125, 94.01847076416016, -219.06549072265625, 586.3829345703125, -65.67317962646484, 626.0113525390625, 42.69950485229492, 200.3450927734375, 8.503570556640625, 247.88229370117188, -245.03445434570312, 639.4328002929688, -51.22540283203125, 553.9163818359375, 190.4430694580078, 350.06610107421875, -130.17477416992188, -19.67706871032715, 53.95802688598633, 666.8758544921875, 174.28883361816406, 435.4332275390625, 228.56492614746094, 525.40576171875, 731.900390625, 197.8099365234375, 407.14764404296875, -253.9033660888672, 227.62393188476562, -14.870773315429688, 358.8004455566406, 231.66519165039062, 154.61398315429688, 431.2527770996094, -210.93844604492188, 1017.738037109375, 508.7999572753906, 577.92919921875, -238.2652130126953, 411.5655212402344, 490.8896789550781, 189.9995880126953, -384.1827392578125, -34.932247161865234, 395.8345947265625, 69.352783203125, 657.3112182617188, 98.31133270263672, -110.37228393554688, 425.3397521972656, 885.214111328125, 462.8818359375, 188.10618591308594, 559.4597778320312, 261.76678466796875, 179.93161010742188, 270.0162658691406, -300.9745178222656, 265.2753601074219, 480.34173583984375, -74.91801452636719, 193.37884521484375, -74.09920501708984], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000399.npy"}
|
|
{"epoch": 0.5859030837004405, "step": 400, "batch_size": 64, "mean": 330.199951171875, "std": 287.2948303222656, "min": -422.671875, "p10": -50.32951965332031, "median": 312.2267303466797, "p90": 681.0405700683594, "max": 1103.0087890625, "pos_frac": 0.828125, "sample": [266.1824645996094, 433.49029541015625, 385.14556884765625, 242.00137329101562, 597.3814697265625, -51.251190185546875, 429.7236328125, 236.70697021484375, -67.49529266357422, 561.7078247070312, 182.09725952148438, 147.06939697265625, 493.2684020996094, 250.59959411621094, -422.671875, 661.9140014648438, 703.9367065429688, 221.66954040527344, 316.279052734375, 409.8841552734375, 782.1991577148438, 540.3095092773438, 145.13882446289062, 395.5696105957031, 132.1061248779297, 286.37908935546875, 115.68772888183594, 534.3336181640625, 322.23095703125, 382.82427978515625, 364.1266174316406, 149.29315185546875, -92.65424346923828, 356.687255859375, 308.1744079589844, -195.59104919433594, 34.018089294433594, 731.3153076171875, 330.1536560058594, 502.31610107421875, -22.606521606445312, 246.2520751953125, 298.6690979003906, 984.3052978515625, -52.60882568359375, 558.3208618164062, 297.3296203613281, 187.2488555908203, -10.084117889404297, 1103.0087890625, 607.2607421875, -44.919525146484375, 285.680908203125, 463.05767822265625, 291.59735107421875, 620.0782470703125, -161.8167266845703, 689.2376708984375, 602.22998046875, 458.5211181640625, 267.25775146484375, 633.1085205078125, 757.5899658203125, -48.178955078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000400.npy"}
|
|
{"epoch": 0.5873715124816447, "step": 401, "batch_size": 64, "mean": 185.60690307617188, "std": 362.07354736328125, "min": -759.6788940429688, "p10": -187.3199981689453, "median": 119.85631561279297, "p90": 728.4331420898437, "max": 1052.100830078125, "pos_frac": 0.703125, "sample": [122.42433166503906, 426.596923828125, -62.97471237182617, 216.0941162109375, 549.6589965820312, 33.079673767089844, -382.8280029296875, -372.5995788574219, -759.6788940429688, -123.93833923339844, 25.74974822998047, 211.75418090820312, -192.460693359375, 73.7049331665039, -26.60601806640625, -17.280065536499023, 375.5141906738281, 263.4794006347656, 117.28829956054688, 107.59681701660156, -75.62680053710938, -151.66636657714844, 6.271903991699219, 927.0657958984375, 7.782079696655273, 582.6846923828125, 829.1704711914062, 421.3172607421875, 731.7002563476562, 214.52793884277344, 89.80944061279297, 287.0821228027344, -59.993892669677734, 67.96411895751953, -55.857643127441406, -438.7889099121094, 900.8348388671875, -38.05317306518555, 221.17112731933594, 530.0728759765625, 197.7008819580078, 554.561767578125, 319.2820129394531, -55.761356353759766, 277.12615966796875, 155.14358520507812, 177.30287170410156, -131.10472106933594, 1052.100830078125, -393.03240966796875, 666.6944580078125, 458.70672607421875, 310.3629150390625, 68.30330657958984, 845.7947998046875, 806.5533447265625, 39.290992736816406, 312.3817443847656, 81.28176879882812, 51.27889633178711, 186.2821044921875, 720.8098754882812, -228.93685913085938, -175.32504272460938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000401.npy"}
|
|
{"epoch": 0.5888399412628488, "step": 402, "batch_size": 64, "mean": 344.6689758300781, "std": 306.7999267578125, "min": -296.12060546875, "p10": 23.73477172851563, "median": 298.8147277832031, "p90": 797.8014831542971, "max": 1425.2257080078125, "pos_frac": 0.90625, "sample": [420.5735168457031, 28.736204147338867, -17.554786682128906, 452.4129638671875, 242.6776123046875, 429.0967102050781, -4.72607421875, 226.59725952148438, -51.62267303466797, 586.3504638671875, 399.68841552734375, 558.722900390625, 649.3762817382812, 275.2235412597656, 58.99591064453125, 570.4627685546875, 21.898910522460938, 306.55987548828125, 213.17431640625, 404.1646423339844, 563.8169555664062, 265.22454833984375, 574.1791381835938, -296.12060546875, 330.0476989746094, 468.42340087890625, 44.45904541015625, 920.3260498046875, 38.99903869628906, 839.4384765625, 179.30062866210938, 382.14288330078125, 193.6550750732422, 441.13165283203125, 133.06329345703125, 333.2005310058594, 817.2056274414062, 123.04043579101562, 186.1825408935547, 665.9972534179688, 28.018447875976562, -70.45692443847656, 872.0255737304688, 900.0133056640625, 282.9833068847656, 521.8359375, 752.525146484375, 144.9830780029297, 290.2745666503906, 1425.2257080078125, 424.8388671875, 549.3941650390625, 50.52943420410156, 876.1983642578125, 183.79244995117188, 310.57464599609375, 338.43634033203125, 257.53131103515625, 142.15664672851562, -286.4552307128906, 374.86248779296875, 202.05667114257812, 221.87844848632812, 291.069580078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000402.npy"}
|
|
{"epoch": 0.5903083700440529, "step": 403, "batch_size": 64, "mean": 266.7139892578125, "std": 303.91729736328125, "min": -344.39691162109375, "p10": -107.18658981323242, "median": 249.41559600830078, "p90": 675.3305480957033, "max": 1097.064453125, "pos_frac": 0.796875, "sample": [254.99627685546875, 75.6183853149414, 81.96903228759766, 217.09954833984375, 399.9902648925781, 60.11825942993164, 256.87298583984375, 391.14019775390625, -100.42557525634766, -201.4083709716797, 164.11697387695312, -26.006744384765625, -344.39691162109375, 558.2152099609375, 206.11500549316406, 863.1846923828125, 243.7853546142578, 25.6674861907959, 438.600341796875, 243.8349151611328, 368.31207275390625, 69.91729736328125, 701.457763671875, -58.566322326660156, -171.73092651367188, 576.7864379882812, 494.40814208984375, 697.8551025390625, 258.1300964355469, 993.5582275390625, -111.47496032714844, 426.9736633300781, 596.25732421875, 38.45321273803711, 10.942680358886719, 534.8706665039062, -122.90435028076172, 285.4770812988281, 622.7732543945312, 495.95904541015625, -178.63389587402344, 306.1585388183594, 1097.064453125, -1.8028507232666016, 495.5356140136719, 122.40370178222656, 320.5196838378906, 733.6937255859375, 341.7314147949219, 952.96923828125, 306.7456970214844, 390.7463684082031, 319.8320617675781, 146.27015686035156, -48.961639404296875, 44.23976135253906, -31.799020767211914, 351.87762451171875, 154.4459686279297, 114.37399291992188, 347.5402526855469, 204.52667236328125, -110.08416748046875, 173.75833129882812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000403.npy"}
|
|
{"epoch": 0.591776798825257, "step": 404, "batch_size": 64, "mean": 262.11383056640625, "std": 349.90411376953125, "min": -547.3900756835938, "p10": -194.50329437255854, "median": 262.8769836425781, "p90": 713.6739379882814, "max": 1174.9381103515625, "pos_frac": 0.75, "sample": [-289.57489013671875, 303.5926208496094, 43.114585876464844, 634.7318115234375, 578.815185546875, 164.4515380859375, 168.14723205566406, 946.4501953125, 166.0474853515625, 729.5732421875, 1174.9381103515625, 222.4044952392578, 284.213623046875, 811.1722412109375, -85.42841339111328, -352.2374572753906, 828.262451171875, 108.59941864013672, -282.0796203613281, 188.7718048095703, -115.96145629882812, 20.17969512939453, -80.64131164550781, 426.56610107421875, 251.59893798828125, 300.7406311035156, -215.86093139648438, 434.6244201660156, 274.155029296875, 404.3627624511719, 548.6298217773438, -240.34132385253906, 422.14788818359375, 156.90109252929688, 563.3414306640625, 126.54180145263672, -144.66880798339844, 175.9204559326172, 310.0307312011719, 569.3364868164062, -547.3900756835938, 418.331298828125, 69.882080078125, 644.8351440429688, 201.51576232910156, -272.658203125, -110.46705627441406, 482.3351745605469, 604.56689453125, -75.33769226074219, 436.7607727050781, -3.3356094360351562, 401.87353515625, 386.0853576660156, 518.75390625, 760.6259155273438, 325.4029235839844, 56.01600646972656, -8.400720596313477, -80.18984985351562, 903.4926147460938, 110.07897186279297, 676.5755615234375, 344.36309814453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000404.npy"}
|
|
{"epoch": 0.593245227606461, "step": 405, "batch_size": 64, "mean": 207.8790283203125, "std": 378.0, "min": -528.8992919921875, "p10": -257.65058593749995, "median": 148.39952087402344, "p90": 770.0597290039065, "max": 1127.365478515625, "pos_frac": 0.6875, "sample": [823.5910034179688, 787.7806396484375, 246.33984375, -208.8866729736328, -434.9656982421875, -41.004425048828125, -465.8363037109375, 132.90298461914062, 413.5960998535156, -268.49151611328125, 52.837623596191406, 1025.86962890625, 547.3926391601562, 126.5279541015625, 53.985816955566406, 1127.365478515625, -14.568300247192383, -63.078895568847656, 51.16436767578125, -147.9488983154297, 67.6180191040039, 185.18202209472656, 109.37562561035156, 521.1520385742188, 163.89605712890625, 108.6697769165039, -134.2729034423828, 492.8330993652344, -347.22283935546875, -74.40596771240234, 419.20556640625, 99.43994140625, 434.0411071777344, -11.554819107055664, 408.498291015625, 77.34473419189453, 290.9015197753906, -268.0087890625, -133.18746948242188, 78.71817016601562, 458.2021179199219, 235.26296997070312, 714.9805908203125, 360.2840881347656, -36.65719985961914, 183.0227508544922, 567.869140625, -208.22933959960938, 245.57931518554688, 276.85296630859375, 374.61761474609375, -434.98443603515625, 286.28594970703125, 877.662109375, 114.1159439086914, -528.8992919921875, 485.490966796875, 404.01947021484375, -50.3726806640625, 924.742919921875, 819.9360961914062, -233.4814453125, 506.45025634765625, 728.7109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000405.npy"}
|
|
{"epoch": 0.5947136563876652, "step": 406, "batch_size": 64, "mean": 286.5443115234375, "std": 348.5628662109375, "min": -380.06597900390625, "p10": -122.05048370361325, "median": 225.94449615478516, "p90": 751.2952148437502, "max": 1239.347900390625, "pos_frac": 0.78125, "sample": [524.7979125976562, 188.17953491210938, 288.9114685058594, 55.172119140625, 157.88934326171875, 241.5570068359375, 163.02761840820312, -340.94891357421875, 1211.2938232421875, 207.0816650390625, 439.19854736328125, 270.006103515625, 1239.347900390625, 584.3969116210938, 299.58538818359375, -6.299060821533203, 560.3114624023438, 302.7090148925781, 609.2334594726562, -141.7648468017578, 890.8314208984375, 348.5958557128906, 336.1812744140625, -149.2030792236328, 99.21472930908203, 168.60890197753906, 71.11128234863281, -96.83511352539062, 518.1046752929688, -1.568450927734375, -132.85707092285156, 224.8101348876953, 549.9912109375, -296.4940185546875, -195.285400390625, 517.2726440429688, 784.6986083984375, 227.078857421875, 173.1481475830078, -14.457263946533203, 137.77700805664062, 208.82925415039062, 469.5520324707031, -38.976932525634766, -12.553802490234375, 66.7713394165039, 770.4906005859375, 345.9555358886719, 35.3843994140625, 224.2529296875, 639.0303344726562, 706.5059814453125, 427.1767883300781, 37.84965515136719, -380.06597900390625, 10.044998168945312, 811.0794677734375, 567.0906372070312, 1067.723876953125, 380.0668640136719, 279.1964111328125, 626.3270874023438, 96.90946960449219, -14.216453552246094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000406.npy"}
|
|
{"epoch": 0.5961820851688693, "step": 407, "batch_size": 64, "mean": 172.09341430664062, "std": 333.45184326171875, "min": -889.6163330078125, "p10": -229.21056213378907, "median": 163.82671356201172, "p90": 616.2310363769532, "max": 872.138671875, "pos_frac": 0.734375, "sample": [-289.0836181640625, 544.32861328125, 521.444580078125, -347.5446472167969, -295.2379150390625, -76.7998275756836, 255.88462829589844, -175.30661010742188, -330.8525695800781, 261.72857666015625, 369.33489990234375, 97.03123474121094, 391.9783935546875, 710.37548828125, 684.7861328125, 14.807878494262695, -127.19046020507812, 494.80438232421875, 85.03052520751953, -110.68345642089844, 375.8236083984375, 128.60589599609375, 45.53028869628906, 87.82162475585938, 100.23289489746094, 503.6856689453125, 215.18870544433594, 254.279296875, 90.28865051269531, 206.38441467285156, -232.16213989257812, 171.63296508789062, 14.108901977539062, -101.78900909423828, 27.230552673339844, 156.0204620361328, 488.7734069824219, 737.0816040039062, 78.74031066894531, -889.6163330078125, 738.1088256835938, 329.63128662109375, 432.96966552734375, 13.515602111816406, 424.55291748046875, 210.20875549316406, 209.8179931640625, 872.138671875, 400.2421569824219, 323.54608154296875, 625.6902465820312, -222.32354736328125, -127.66893005371094, 594.1595458984375, -39.86042022705078, 89.99238586425781, 128.6365203857422, 505.7142028808594, -206.5877227783203, -398.05938720703125, 642.595458984375, 250.5893096923828, -204.0049591064453, 283.675537109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000407.npy"}
|
|
{"epoch": 0.5976505139500734, "step": 408, "batch_size": 64, "mean": 206.1318359375, "std": 272.343505859375, "min": -338.236572265625, "p10": -157.5821823120117, "median": 216.36167907714844, "p90": 609.567810058594, "max": 776.9927368164062, "pos_frac": 0.78125, "sample": [84.62394714355469, 703.1080932617188, 776.9927368164062, 0.616455078125, 399.52044677734375, 362.8232727050781, -107.38909912109375, 145.51040649414062, -141.54168701171875, -164.45668029785156, -20.278364181518555, -282.6334533691406, 114.19596099853516, 12.8433837890625, -47.784873962402344, -338.236572265625, 91.25199890136719, 313.54107666015625, 384.90478515625, 60.38975524902344, 176.39674377441406, 382.33880615234375, -120.80078125, 414.00653076171875, -130.2196502685547, 259.1346435546875, 106.81061553955078, 253.08816528320312, 80.19170379638672, 699.5315551757812, 327.50665283203125, 210.16452026367188, 545.2916259765625, -15.470603942871094, -274.0296630859375, -229.23477172851562, 267.2834777832031, 236.8773651123047, 64.73954772949219, 212.65914916992188, 382.5459899902344, 444.9490661621094, 373.41839599609375, 451.61663818359375, 194.28778076171875, -241.41986083984375, 711.8436279296875, 35.67156219482422, 11.598407745361328, 93.08016967773438, 220.064208984375, 642.2205200195312, 271.1863098144531, -168.25479125976562, 364.98419189453125, 308.4371643066406, 73.01642608642578, 475.44580078125, 226.9364471435547, 308.38470458984375, 707.1353149414062, 637.11474609375, 494.150390625, 359.7564697265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000408.npy"}
|
|
{"epoch": 0.5991189427312775, "step": 409, "batch_size": 64, "mean": 200.32015991210938, "std": 282.85504150390625, "min": -443.8636169433594, "p10": -92.58041305541992, "median": 170.23763275146484, "p90": 579.9234008789064, "max": 913.8094482421875, "pos_frac": 0.75, "sample": [-38.632293701171875, -322.543701171875, 274.4745788574219, 221.98643493652344, 200.2401123046875, 364.53826904296875, -85.8919448852539, -95.56964111328125, -12.119232177734375, 180.4687957763672, 523.4789428710938, 554.2958984375, -292.35003662109375, 160.10333251953125, 406.572265625, 144.11611938476562, 219.23846435546875, 279.1930847167969, 181.53927612304688, 757.5123291015625, 331.2365417480469, -91.37105560302734, 357.40362548828125, -12.915241241455078, 657.685546875, -160.02105712890625, -4.372077941894531, 52.73236846923828, -7.029632568359375, 153.40907287597656, 323.4896240234375, 741.8665161132812, 499.2967529296875, -443.8636169433594, 78.36566925048828, 913.8094482421875, -3.0776596069335938, 97.75459289550781, -336.0921630859375, 95.77820587158203, 517.72314453125, 10.432411193847656, 590.9066162109375, 36.8195915222168, 249.25640869140625, 256.25079345703125, 80.35540008544922, 129.45753479003906, 815.0452270507812, 400.4791564941406, -16.479074478149414, 180.37193298339844, -93.09870910644531, 199.50755310058594, 185.97572326660156, 449.45440673828125, 360.0773620605469, 314.737060546875, 130.02340698242188, 101.53306579589844, 787.330322265625, 24.055435180664062, 94.88348388671875, 150.65509033203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000409.npy"}
|
|
{"epoch": 0.6005873715124816, "step": 410, "batch_size": 64, "mean": 236.67666625976562, "std": 284.5502624511719, "min": -554.3952026367188, "p10": -95.41731109619138, "median": 255.5237045288086, "p90": 599.3366516113282, "max": 846.8779296875, "pos_frac": 0.828125, "sample": [219.21405029296875, 261.59271240234375, -123.21365356445312, 60.957679748535156, 486.1907958984375, 335.4609069824219, 537.9090576171875, 440.4433288574219, 516.8099975585938, 19.46406364440918, 116.64741516113281, 235.87646484375, 12.022994995117188, 397.3802490234375, -107.72013092041016, -128.26202392578125, 618.853515625, -396.49029541015625, -47.74454116821289, 395.3350830078125, 452.0826416015625, 750.93603515625, -38.85719299316406, 95.71650695800781, 609.1461791992188, 695.9754028320312, 373.19708251953125, 68.425537109375, 338.69366455078125, 846.8779296875, 305.48565673828125, 29.267169952392578, 347.4608459472656, 33.72705078125, 428.7637939453125, 7.959747314453125, -66.71073150634766, 7.732263565063477, 166.960205078125, -235.8477783203125, 216.65956115722656, 509.9922790527344, 165.78729248046875, 340.5814208984375, 203.85214233398438, -554.3952026367188, 299.0588684082031, 253.42140197753906, 717.5150756835938, 347.405517578125, 109.17718505859375, 52.37397003173828, 414.45904541015625, 517.4411010742188, 257.6260070800781, 756.2919311523438, 189.8509063720703, 404.37481689453125, -334.7213134765625, 126.51445007324219, 264.8761901855469, -16.021371841430664, 576.44775390625, 261.0179138183594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000410.npy"}
|
|
{"epoch": 0.6020558002936858, "step": 411, "batch_size": 64, "mean": 230.87094116210938, "std": 334.8406982421875, "min": -559.5396728515625, "p10": -69.69572219848632, "median": 166.80126190185547, "p90": 643.3861938476563, "max": 1348.868408203125, "pos_frac": 0.734375, "sample": [247.93914794921875, 8.993461608886719, -218.20684814453125, 112.8073959350586, 517.9063720703125, 226.08367919921875, 15.516849517822266, 241.46728515625, 397.76220703125, 418.194091796875, 125.6522445678711, -29.55076026916504, 196.52557373046875, -44.37661361694336, 68.7391586303711, -20.070802688598633, 464.3623962402344, 259.5745544433594, 92.83438110351562, 417.0675048828125, 895.1936645507812, 335.4056701660156, -146.99542236328125, -22.482606887817383, 171.14181518554688, 158.88026428222656, -59.33580017089844, 230.52561950683594, 103.71353149414062, 1036.479248046875, 129.76205444335938, 449.0039367675781, 879.9249877929688, -74.13568878173828, 505.91827392578125, -0.5345611572265625, 368.61083984375, 588.5142822265625, 739.2559204101562, 153.70510864257812, 35.24175262451172, 336.0103759765625, 140.7845458984375, 281.8592529296875, 159.104248046875, 358.63446044921875, -19.956798553466797, -559.5396728515625, 1348.868408203125, 283.6556396484375, 162.46070861816406, 352.9821472167969, 588.283203125, 649.819091796875, 628.3760986328125, -188.62255859375, 11.281822204589844, -442.7227783203125, 172.366455078125, -32.282867431640625, 777.1708984375, -152.99041748046875, -7.2823638916015625, -49.53385543823242], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000411.npy"}
|
|
{"epoch": 0.6035242290748899, "step": 412, "batch_size": 64, "mean": 234.52589416503906, "std": 320.2996520996094, "min": -509.74853515625, "p10": -126.91835021972656, "median": 211.89244079589844, "p90": 607.8744567871095, "max": 1166.2406005859375, "pos_frac": 0.78125, "sample": [-95.91915130615234, 182.41453552246094, 300.14739990234375, 473.17059326171875, 321.71484375, -169.13931274414062, 723.2026977539062, -48.576942443847656, 210.1231689453125, 568.5206298828125, 135.40106201171875, 311.8865661621094, 67.3135986328125, 338.4691162109375, 0.9617748260498047, 368.2921142578125, 331.5546875, 123.5209732055664, -509.74853515625, 256.57977294921875, 36.72132110595703, 35.226417541503906, 630.2716064453125, 196.7877655029297, 30.583629608154297, 422.69158935546875, 18.721038818359375, 327.3212585449219, -198.19961547851562, 190.00198364257812, -56.783653259277344, 112.4975357055664, 213.66171264648438, 237.27557373046875, 143.8665313720703, -49.23210144042969, -308.5535888671875, -128.4796905517578, 173.36834716796875, 907.7093505859375, 21.655029296875, -505.4300537109375, 340.55181884765625, 552.0294189453125, 732.0068969726562, -123.27522277832031, 503.57373046875, 787.013427734375, -0.6942138671875, 559.515869140625, 342.5115966796875, -43.576446533203125, 103.44093322753906, 613.2656860351562, 564.1802978515625, 1166.2406005859375, 4.582668304443359, 451.83587646484375, 394.06243896484375, -225.88131713867188, 481.269775390625, 398.6999816894531, 595.294921875, 471.4361877441406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000412.npy"}
|
|
{"epoch": 0.604992657856094, "step": 413, "batch_size": 64, "mean": 238.4629364013672, "std": 313.2305908203125, "min": -821.4711303710938, "p10": -108.511351776123, "median": 247.18698120117188, "p90": 607.8877990722657, "max": 958.7742309570312, "pos_frac": 0.859375, "sample": [70.81484985351562, -2.752391815185547, 174.96270751953125, 405.7555847167969, 164.97100830078125, 147.4010467529297, 142.23486328125, 267.248779296875, 34.258155822753906, 225.52249145507812, 653.9488525390625, 924.8370361328125, -74.56287384033203, 275.27923583984375, 18.754310607910156, 5.81273078918457, 334.72967529296875, 111.78125, 1.8789825439453125, 888.9212646484375, 98.61985778808594, 419.2412109375, 378.3013000488281, -821.4711303710938, 76.4488525390625, 294.23681640625, 296.9761962890625, 227.12518310546875, 600.093505859375, 958.7742309570312, 374.22509765625, 350.9228210449219, 617.235595703125, 375.21490478515625, -123.06069946289062, 445.96832275390625, 326.5572509765625, 159.94647216796875, 282.9566345214844, 307.44317626953125, 143.15965270996094, 155.4206085205078, 526.2476806640625, -301.9345397949219, 611.2282104492188, 389.2705078125, 173.01205444335938, -262.8427429199219, -270.15185546875, -553.5607299804688, 221.9008331298828, 482.67791748046875, 476.80694580078125, 745.9276123046875, 28.206066131591797, 517.3670654296875, 328.8359069824219, 452.1640319824219, 206.05709838867188, 308.530029296875, 115.65640258789062, 209.81138610839844, 350.56536865234375, -210.2806396484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000413.npy"}
|
|
{"epoch": 0.6064610866372981, "step": 414, "batch_size": 64, "mean": 256.247802734375, "std": 316.9881591796875, "min": -697.1141357421875, "p10": -60.89537353515624, "median": 254.1136016845703, "p90": 641.8205810546876, "max": 1013.56640625, "pos_frac": 0.8125, "sample": [338.4967041015625, 216.76280212402344, 496.3238525390625, 313.4790954589844, 23.80337905883789, 924.5217895507812, 172.78485107421875, -697.1141357421875, 29.263656616210938, 200.55615234375, 292.14312744140625, 161.31640625, 652.3599853515625, 855.8004150390625, 251.67367553710938, -79.87037658691406, 403.02801513671875, 70.06784057617188, -109.21566009521484, 153.8828887939453, 210.83941650390625, 528.0642700195312, -66.12223815917969, 145.91482543945312, 540.365966796875, 276.11724853515625, -228.27163696289062, -21.437273025512695, 367.60760498046875, 56.26289367675781, 500.7178649902344, 533.1609497070312, 20.418170928955078, 378.66064453125, 719.9299926757812, -38.368717193603516, 330.8829345703125, -48.69935607910156, -418.77752685546875, 274.1803894042969, 310.518310546875, 256.55352783203125, 612.33251953125, -199.58953857421875, 43.34855651855469, 529.7698974609375, 31.116668701171875, 617.2286376953125, 156.16830444335938, -39.7426643371582, 296.962158203125, 102.61003112792969, 153.283203125, 90.32408905029297, 270.1675109863281, 332.3358154296875, 790.1046142578125, -30.747955322265625, 441.56903076171875, 976.5620727539062, 1013.56640625, 407.2273254394531, 136.790283203125, 369.89080810546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000414.npy"}
|
|
{"epoch": 0.6079295154185022, "step": 415, "batch_size": 64, "mean": 190.50540161132812, "std": 293.50885009765625, "min": -763.3541870117188, "p10": -103.60505905151368, "median": 159.98695373535156, "p90": 606.3653808593751, "max": 894.903564453125, "pos_frac": 0.78125, "sample": [213.70382690429688, -102.88851928710938, 376.24981689453125, 800.130859375, 28.037612915039062, 763.6449584960938, 578.20849609375, 87.87960052490234, -9.169639587402344, 187.20765686035156, 512.217529296875, 383.2449951171875, 427.4273681640625, 533.115234375, -140.39297485351562, -74.83827209472656, 208.40994262695312, -245.60763549804688, 336.28533935546875, 149.11412048339844, 7.259010314941406, 37.4996337890625, 5.62969970703125, 618.4326171875, -149.34127807617188, 123.8633804321289, 627.6090087890625, -103.91214752197266, 95.14875030517578, 161.47698974609375, -32.1729736328125, 391.0691833496094, 229.19607543945312, 51.7449951171875, 894.903564453125, -318.2059326171875, -91.71784973144531, 113.90235900878906, 410.33489990234375, 651.162109375, 86.41851806640625, 363.0756530761719, 175.22076416015625, 85.06231689453125, 136.0499267578125, 219.4099578857422, 24.70049476623535, 217.30470275878906, 382.93133544921875, -100.65430450439453, 232.1282958984375, 445.5760803222656, 270.04833984375, 7.44122314453125, 358.8507080078125, -48.62339782714844, 158.49691772460938, 6.886383056640625, 509.214599609375, -263.7743225097656, 629.4111328125, 63.97907638549805, -763.3541870117188, 260.68316650390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000415.npy"}
|
|
{"epoch": 0.6093979441997063, "step": 416, "batch_size": 64, "mean": 315.087646484375, "std": 313.6957702636719, "min": -238.64993286132812, "p10": -55.38152046203613, "median": 271.4444885253906, "p90": 757.6973266601562, "max": 1028.4927978515625, "pos_frac": 0.84375, "sample": [848.2700805664062, 113.63660430908203, 161.18914794921875, 443.09228515625, -48.416343688964844, 584.75830078125, 157.0867919921875, 25.68834686279297, 460.28363037109375, 770.596435546875, -238.64993286132812, -216.19778442382812, 17.616552352905273, 273.7378234863281, 242.76766967773438, 530.4274291992188, 605.78857421875, 6.207847595214844, 590.2731323242188, 212.45408630371094, 403.71331787109375, 492.6459045410156, 43.90812301635742, 159.43136596679688, 757.482421875, 241.73672485351562, 285.8492431640625, 746.2385864257812, 150.92886352539062, 757.7894287109375, 201.28367614746094, 549.8897705078125, 968.6072998046875, -164.73797607421875, -134.04534912109375, 84.75316619873047, 574.2783813476562, 485.47503662109375, -22.06879425048828, 1028.4927978515625, 22.67601776123047, 899.4675903320312, 273.43157958984375, 269.4573974609375, 508.67974853515625, 629.9017333984375, 294.49761962890625, 644.4122314453125, 235.06846618652344, 107.50440216064453, 54.64139938354492, 215.1903839111328, -160.79440307617188, 786.2015991210938, 403.4244384765625, -48.67755126953125, 351.4315185546875, -58.2546501159668, 277.987060546875, -86.87547302246094, 60.821739196777344, 187.2962646484375, 436.34466552734375, 709.514404296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000416.npy"}
|
|
{"epoch": 0.6108663729809104, "step": 417, "batch_size": 64, "mean": 269.8897705078125, "std": 287.2241516113281, "min": -387.4964599609375, "p10": -93.15904235839842, "median": 244.02118682861328, "p90": 693.7992004394531, "max": 889.2012939453125, "pos_frac": 0.828125, "sample": [-5.623773574829102, 13.516311645507812, -15.501958847045898, 334.02935791015625, 9.873409271240234, 507.7548828125, 38.55427551269531, 416.47186279296875, -142.97329711914062, 694.0288696289062, 58.58957290649414, 626.251220703125, 391.7980651855469, 106.10185241699219, 239.8341522216797, -118.61936950683594, -107.67021179199219, 740.8314819335938, 671.9328002929688, 52.652374267578125, 629.7701416015625, 171.06820678710938, 864.9051513671875, 270.59942626953125, 815.7142944335938, 889.2012939453125, 219.98565673828125, 253.71951293945312, 114.5654525756836, 357.4626159667969, 131.61068725585938, 162.91831970214844, -163.21517944335938, 83.23169708251953, 399.34039306640625, 693.2633056640625, 31.20834732055664, 404.2015380859375, 443.9245910644531, 239.8086700439453, 130.75869750976562, 348.6584167480469, 248.02980041503906, 346.4839172363281, -153.5384979248047, 302.87286376953125, 83.76659393310547, 177.80947875976562, 493.812744140625, 246.00816345214844, 442.39453125, -387.4964599609375, 331.4983825683594, 394.5009460449219, -97.9016342163086, -82.0929946899414, 242.03421020507812, 117.31505584716797, 706.411376953125, 480.97314453125, 668.7597045898438, 80.67558288574219, 697.71533203125, -71.6208267211914], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000417.npy"}
|
|
{"epoch": 0.6123348017621145, "step": 418, "batch_size": 64, "mean": 268.3392028808594, "std": 395.8111572265625, "min": -416.396728515625, "p10": -190.13241729736325, "median": 180.15247344970703, "p90": 775.0614135742189, "max": 1389.4530029296875, "pos_frac": 0.75, "sample": [278.32476806640625, -161.4162139892578, 67.012939453125, -60.26816177368164, 74.98786926269531, 966.8666381835938, 67.20381927490234, 71.10675048828125, 733.07568359375, 623.1310424804688, -43.71417236328125, -40.404052734375, 557.7646484375, -134.9583740234375, 1273.2119140625, 590.7816162109375, 308.41766357421875, -111.82801818847656, 465.4379577636719, 5.8104248046875, -339.1910400390625, 793.0552978515625, -236.62918090820312, 548.3961181640625, 230.25576782226562, 25.609085083007812, 289.8881530761719, 659.8163452148438, 562.2503662109375, 212.62283325195312, 574.645751953125, 68.45112609863281, 117.98348236083984, 924.2542724609375, -416.396728515625, -232.5936279296875, -202.43936157226562, 382.98651123046875, 240.56809997558594, 533.6060791015625, 1389.4530029296875, -54.44910430908203, 100.08831787109375, 106.55734252929688, 313.108154296875, 147.68211364746094, 139.04345703125, 103.07723999023438, -338.9849853515625, -315.2327880859375, 371.70855712890625, 1.8387908935546875, 577.6204833984375, -40.04441833496094, 561.7546997070312, 266.8636169433594, 628.2733154296875, 1097.567138671875, 138.8753662109375, 47.63682174682617, 491.6875305175781, 941.4741821289062, 301.8420715332031, -71.4169921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000418.npy"}
|
|
{"epoch": 0.6138032305433186, "step": 419, "batch_size": 64, "mean": 306.9680480957031, "std": 346.4142761230469, "min": -503.8890075683594, "p10": -122.50778350830073, "median": 322.06036376953125, "p90": 716.2543579101563, "max": 1144.1309814453125, "pos_frac": 0.859375, "sample": [725.86083984375, -503.8890075683594, 365.2898864746094, 590.2022705078125, 59.84095001220703, 921.7814331054688, 81.23739624023438, -70.40882873535156, 26.69091796875, 719.9807739257812, 1144.1309814453125, 183.87637329101562, 345.888916015625, 568.8671875, 472.9674072265625, 546.6812744140625, 193.97720336914062, 156.93260192871094, -491.52587890625, 82.31085968017578, -182.89971923828125, 756.0157470703125, -252.71885681152344, 294.3611755371094, 571.4577026367188, -303.17047119140625, 179.66958618164062, 204.70884704589844, 483.400634765625, 898.5186767578125, 473.32464599609375, 54.7149658203125, 546.2218627929688, 350.01495361328125, 464.7120361328125, 107.48729705810547, 121.85383605957031, -10.870819091796875, 517.5028076171875, 354.99603271484375, 686.43359375, 191.10421752929688, 668.7987060546875, 83.96482849121094, 97.20256805419922, 60.47159957885742, 1.5234394073486328, -144.83590698242188, 678.540771484375, 605.7756958007812, 573.9544677734375, 414.6121520996094, 55.10467529296875, 963.1964111328125, 489.41583251953125, 442.8005676269531, 557.0684204101562, 298.2318115234375, 148.8433837890625, 707.5593872070312, -262.3377685546875, 10.3001708984375, 165.68798828125, 402.5440368652344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000419.npy"}
|
|
{"epoch": 0.6152716593245228, "step": 420, "batch_size": 64, "mean": 198.10260009765625, "std": 415.9445495605469, "min": -776.9608154296875, "p10": -281.2812561035155, "median": 151.37195587158203, "p90": 615.8023315429688, "max": 1353.8707275390625, "pos_frac": 0.671875, "sample": [-142.56634521484375, 229.10623168945312, -32.787330627441406, -18.40420913696289, 578.900390625, -119.2820053100586, -334.3971862792969, -20.0113525390625, 182.6934051513672, 577.4363403320312, 36.86247253417969, -52.84954071044922, -33.33403396606445, 180.8997039794922, -20.4007568359375, 137.103515625, 568.2664794921875, 401.0361328125, 80.65769958496094, 282.0758972167969, 119.6444091796875, 282.2446594238281, -142.82247924804688, 498.6133728027344, -100.10162353515625, 178.27322387695312, 8.062606811523438, 433.2879638671875, 167.182861328125, -38.46929931640625, 549.9373779296875, 512.4237060546875, 517.9429321289062, -398.48736572265625, 121.94380950927734, 178.86428833007812, -345.2996826171875, -4.401268005371094, 749.181640625, 740.42724609375, -52.309593200683594, -776.9608154296875, 681.7282104492188, 461.9122009277344, 619.497802734375, 147.94674682617188, 607.1795654296875, 484.2186279296875, 395.9228515625, 321.37420654296875, 2.9763736724853516, -512.383544921875, 361.89373779296875, 39.90646743774414, 13.393688201904297, 1277.2791748046875, -738.9537353515625, 87.13600158691406, 154.7971649169922, 484.96185302734375, 1321.35986328125, -157.34408569335938, -410.2900695800781, 1353.8707275390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000420.npy"}
|
|
{"epoch": 0.6167400881057269, "step": 421, "batch_size": 64, "mean": 218.4813232421875, "std": 316.6380615234375, "min": -867.9801025390625, "p10": -180.60339508056637, "median": 271.03016662597656, "p90": 597.6869018554688, "max": 885.879150390625, "pos_frac": 0.796875, "sample": [331.4568786621094, 78.11134338378906, 131.54624938964844, -153.62640380859375, 283.193359375, 470.22515869140625, 885.879150390625, -67.95791625976562, 216.4302520751953, 62.07460403442383, 368.51959228515625, 30.48232650756836, 594.633544921875, 680.1818237304688, 498.307373046875, 578.4228515625, 317.264404296875, -206.89222717285156, 120.84679412841797, 154.888916015625, 762.845458984375, 532.704833984375, 281.15924072265625, -274.9870300292969, 278.521484375, 237.0494842529297, 435.268798828125, 291.1630859375, 2.3680801391601562, 246.32058715820312, 598.9954833984375, 429.4632568359375, 652.9467163085938, 285.4478454589844, 373.727783203125, -429.28497314453125, 501.7618408203125, 401.39044189453125, -351.5361633300781, 604.6154174804688, 217.8701171875, -19.703359603881836, 374.387451171875, 805.2901611328125, 334.3617248535156, 255.5504913330078, 161.177490234375, -13.93681526184082, 11.940591812133789, 460.3486328125, 102.94729614257812, 31.59186553955078, 294.78277587890625, -867.9801025390625, -192.1649627685547, 63.37229537963867, 115.45050048828125, -141.5242919921875, -144.7425537109375, 272.83538818359375, 296.385986328125, -325.4464416503906, 269.2249450683594, 386.8555908203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000421.npy"}
|
|
{"epoch": 0.618208516886931, "step": 422, "batch_size": 64, "mean": 251.21853637695312, "std": 372.6368103027344, "min": -741.4208374023438, "p10": -114.95525360107418, "median": 241.5098648071289, "p90": 689.6527099609376, "max": 1382.27490234375, "pos_frac": 0.75, "sample": [268.769287109375, 664.992919921875, 887.8555297851562, 700.22119140625, 344.49200439453125, -237.9217071533203, -741.4208374023438, 721.365234375, -68.0967025756836, 1382.27490234375, -68.99772644042969, 162.50486755371094, 246.7913055419922, 45.04901123046875, 389.01129150390625, 187.83953857421875, 68.37374114990234, -138.10244750976562, -10.49294662475586, 229.91212463378906, 8.008513450622559, 774.7116088867188, 329.7757568359375, 177.836669921875, 445.30438232421875, 206.504150390625, 187.66897583007812, 632.4097900390625, 474.1167907714844, 639.7916870117188, 761.01123046875, 353.81146240234375, 345.2632141113281, 178.3197479248047, 408.5777587890625, 318.738037109375, 324.988525390625, -335.6454162597656, 412.1337585449219, -5.65423583984375, -32.0965461730957, 437.3912658691406, 236.22842407226562, -735.302978515625, -25.43416976928711, 625.1944580078125, 520.4981689453125, -6.06694221496582, 712.48681640625, -39.24143981933594, 426.19720458984375, 191.26490783691406, 102.08502197265625, 126.58766174316406, 634.6071166992188, 515.626220703125, 278.44000244140625, -57.36347961425781, 490.4604187011719, 488.7629699707031, -134.65133666992188, 165.44349670410156, 211.30679321289062, -726.531494140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000422.npy"}
|
|
{"epoch": 0.6196769456681351, "step": 423, "batch_size": 64, "mean": 275.04901123046875, "std": 324.23388671875, "min": -502.6209716796875, "p10": -122.69287414550777, "median": 238.06320190429688, "p90": 745.9862915039065, "max": 1158.902587890625, "pos_frac": 0.796875, "sample": [265.5863342285156, 129.78970336914062, 203.46205139160156, 22.61566162109375, -137.18798828125, -310.0942077636719, 770.2950439453125, 233.47463989257812, 617.433349609375, 546.9510498046875, 832.144775390625, -214.89010620117188, 890.174560546875, 242.65176391601562, 544.2166137695312, -21.37259292602539, 821.431396484375, 894.77001953125, 785.475341796875, 689.265869140625, -34.5123291015625, -62.60052490234375, 384.13507080078125, 164.50973510742188, 594.6923828125, 193.35809326171875, 172.7744140625, 59.4326171875, -502.6209716796875, -207.46542358398438, 377.62091064453125, -148.80946350097656, 299.3636169433594, 357.8899230957031, 555.170654296875, 270.25982666015625, 34.59635925292969, 432.96734619140625, -184.39697265625, 114.80328369140625, 266.3457336425781, 635.416015625, 198.30917358398438, 396.1353454589844, 1158.902587890625, 144.36158752441406, 425.5011901855469, -9.866317749023438, 402.82342529296875, 79.62318420410156, 96.91609191894531, 274.304931640625, 549.5123291015625, 446.1009521484375, 160.07244873046875, 227.4498748779297, -50.205810546875, 277.2446594238281, 165.7503662109375, -88.87094116210938, 577.8175659179688, 91.74381256103516, 165.85214233398438, 334.5344543457031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000423.npy"}
|
|
{"epoch": 0.6211453744493393, "step": 424, "batch_size": 64, "mean": 255.7906494140625, "std": 350.1225280761719, "min": -358.9618225097656, "p10": -166.3182830810547, "median": 224.63406372070312, "p90": 642.5917602539063, "max": 1330.3076171875, "pos_frac": 0.796875, "sample": [-333.2054443359375, 350.74151611328125, 24.643234252929688, -67.78887939453125, 90.694091796875, 543.838623046875, 396.1813049316406, 323.4677429199219, 103.1436538696289, -240.89105224609375, 225.814453125, 246.24368286132812, -41.91987609863281, -243.1193084716797, 275.1821594238281, 706.843017578125, 430.2738037109375, 1055.4176025390625, 150.9765167236328, 414.5971984863281, 475.9136657714844, 154.4439239501953, 632.35400390625, 630.9882202148438, 310.3750305175781, -127.45331573486328, 223.45367431640625, 154.76705932617188, -177.84576416015625, -163.62481689453125, 62.19731903076172, 1277.496826171875, -167.47262573242188, 318.74542236328125, 350.7064208984375, -96.21484375, 315.11138916015625, -233.6290740966797, 217.14892578125, 47.162261962890625, -97.05107879638672, 136.84164428710938, -358.9618225097656, 93.1524658203125, 313.98602294921875, 289.3847351074219, 262.1235046386719, 85.42996978759766, 254.89303588867188, 1330.3076171875, 417.74420166015625, 646.9793701171875, 348.0236511230469, 178.59854125976562, 382.22784423828125, 86.63656616210938, 842.8200073242188, 185.91424560546875, 174.57432556152344, 1081.8243408203125, 554.11474609375, 274.7124938964844, 121.77769470214844, 148.7886962890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000424.npy"}
|
|
{"epoch": 0.6226138032305433, "step": 425, "batch_size": 64, "mean": 184.33047485351562, "std": 322.9360656738281, "min": -796.1300048828125, "p10": -171.31147308349608, "median": 196.6532440185547, "p90": 573.5288391113282, "max": 804.9915771484375, "pos_frac": 0.78125, "sample": [-105.05657196044922, -41.00214385986328, 374.9977722167969, -338.55670166015625, -796.1300048828125, 192.6485595703125, 140.5489959716797, 272.68536376953125, 87.80105590820312, 140.1853485107422, 4.614234924316406, 513.1893920898438, -649.0679931640625, 12.305538177490234, 267.6059265136719, 57.33782958984375, 45.555702209472656, 95.0966796875, 618.2643432617188, -5.9330596923828125, 328.8247375488281, 158.11322021484375, -14.01821517944336, 279.12030029296875, 716.9580688476562, 12.704597473144531, 504.96881103515625, 35.98121643066406, -32.82097244262695, 804.9915771484375, 121.56317901611328, 589.3785400390625, 641.2557983398438, -174.3012237548828, 185.3971710205078, 249.21469116210938, 535.810546875, 213.3375701904297, 107.03228759765625, 741.3231201171875, -554.1869506835938, 391.1307373046875, 133.93238830566406, -419.3385925292969, 536.5462036132812, 358.490966796875, 436.75567626953125, 227.23312377929688, 6.613433837890625, 394.3261413574219, 189.2918701171875, 234.5, -180.94654846191406, -164.33538818359375, 208.53622436523438, 308.92779541015625, 475.2249755859375, 732.7373657226562, 499.3802185058594, 273.85052490234375, 477.7335205078125, -140.66830444335938, 278.8327331542969, 200.65792846679688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000425.npy"}
|
|
{"epoch": 0.6240822320117474, "step": 426, "batch_size": 64, "mean": 245.7498321533203, "std": 297.3548889160156, "min": -530.449462890625, "p10": -30.846286010742183, "median": 182.24010467529297, "p90": 665.4152099609377, "max": 843.7924194335938, "pos_frac": 0.8125, "sample": [254.24334716796875, 92.95276641845703, 109.1092300415039, 153.09365844726562, 237.7742919921875, 125.54676818847656, 89.65130615234375, 215.5817108154297, 731.5994873046875, 82.54145812988281, 183.5367889404297, 205.00177001953125, 2.9821739196777344, 1.8091983795166016, 637.990966796875, -222.42416381835938, 55.54957580566406, 176.49655151367188, 569.306640625, 180.94342041015625, -8.927772521972656, 785.0390625, -66.5857162475586, 201.28118896484375, 121.47584533691406, 117.17819213867188, -127.89408874511719, 349.8909912109375, 757.7293090820312, 428.8576354980469, 238.11402893066406, 615.630126953125, 69.0816879272461, 396.8086853027344, 162.37684631347656, 810.516357421875, 36.41778564453125, -335.27130126953125, 139.02865600585938, 526.442138671875, -32.177764892578125, 354.51556396484375, 843.7924194335938, 86.59081268310547, 438.813720703125, 49.585166931152344, 331.1209411621094, 572.6619262695312, -27.739501953125, 94.39753723144531, -286.15777587890625, 366.6439208984375, -9.921939849853516, 427.31585693359375, 471.42828369140625, 329.86175537109375, -14.589776992797852, 509.5562744140625, 785.5135498046875, -530.449462890625, 621.4613647460938, 677.16845703125, -0.694854736328125, 568.8160400390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000426.npy"}
|
|
{"epoch": 0.6255506607929515, "step": 427, "batch_size": 64, "mean": 185.90200805664062, "std": 281.4072570800781, "min": -421.413818359375, "p10": -148.4451690673828, "median": 169.51895904541016, "p90": 467.663265991211, "max": 1042.5074462890625, "pos_frac": 0.75, "sample": [349.9049377441406, 28.484722137451172, 363.9999694824219, 405.0696105957031, 774.477783203125, 62.94359588623047, -42.562767028808594, -40.63727569580078, 382.1312561035156, 251.16375732421875, -23.019683837890625, 79.10935974121094, 151.38894653320312, 170.268798828125, 168.7691192626953, 300.64434814453125, 450.1302185058594, 774.5703735351562, 29.198524475097656, -224.06585693359375, 46.233909606933594, 55.39654541015625, 333.4993896484375, 372.406005859375, 159.38272094726562, 358.317138671875, 247.75827026367188, 860.6683959960938, 30.208267211914062, 179.39096069335938, -144.91571044921875, 535.902099609375, 375.05377197265625, -149.95779418945312, -112.34236145019531, 397.60626220703125, 302.69171142578125, 51.197757720947266, 508.682861328125, 203.3148193359375, 244.343994140625, 139.6956329345703, 64.94783782958984, 111.3804931640625, -421.413818359375, 315.4088439941406, 386.7760009765625, 394.387451171875, -316.3697509765625, -320.8958435058594, 475.17742919921875, 211.25827026367188, 377.0274353027344, -3.9998245239257812, 1042.5074462890625, -105.68560791015625, 62.83343505859375, 308.4796447753906, 354.1452941894531, 131.4286346435547, -113.40129852294922, -208.3425750732422, -56.708946228027344, -197.71531677246094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000427.npy"}
|
|
{"epoch": 0.6270190895741556, "step": 428, "batch_size": 64, "mean": 253.0968017578125, "std": 374.240234375, "min": -819.1317749023438, "p10": -145.88828659057614, "median": 270.65966796875, "p90": 706.1392028808594, "max": 1109.6221923828125, "pos_frac": 0.75, "sample": [23.003131866455078, -172.41610717773438, 28.817590713500977, 459.9493713378906, 817.6632080078125, 413.6422119140625, 123.48356628417969, 564.7587890625, 389.73760986328125, 1109.6221923828125, -263.4686584472656, 89.39776611328125, 510.9228210449219, 193.93740844726562, 488.51348876953125, -90.99810791015625, -534.595703125, 708.75634765625, 281.751953125, 538.4373779296875, 465.9582214355469, 496.4085693359375, -60.060577392578125, -28.30099868774414, 512.0064697265625, 782.52685546875, 374.1292724609375, 263.2830810546875, -91.81304931640625, 323.1671142578125, -79.2174301147461, 316.10723876953125, 811.278564453125, -58.59874725341797, 278.0362548828125, -10.561689376831055, 618.35595703125, 492.82354736328125, 47.73711013793945, 81.92608642578125, 88.478515625, 154.76004028320312, 20.46404266357422, -111.67423248291016, -819.1317749023438, -42.04917907714844, 23.532089233398438, 107.02315521240234, 932.6008911132812, 181.55471801757812, 676.0609741210938, 494.7478332519531, 647.1513671875, -650.2805786132812, 547.3152465820312, 279.6988525390625, 639.7879638671875, 790.8579711914062, 386.9725341796875, 181.22413635253906, 700.0325317382812, -199.42555236816406, 112.93827819824219, -160.55145263671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000428.npy"}
|
|
{"epoch": 0.6284875183553598, "step": 429, "batch_size": 64, "mean": 198.12725830078125, "std": 362.75445556640625, "min": -637.475341796875, "p10": -246.07462310791016, "median": 212.0558853149414, "p90": 644.0177001953125, "max": 983.1301879882812, "pos_frac": 0.71875, "sample": [32.146202087402344, 134.57077026367188, -425.5489501953125, 330.0134582519531, -231.36114501953125, 681.8502807617188, -86.79686737060547, 82.33464813232422, 182.25140380859375, 365.64007568359375, -535.9957275390625, 181.36050415039062, 46.140037536621094, 128.6626434326172, -249.9003143310547, 391.6319580078125, 125.10433959960938, -243.2250518798828, 820.96240234375, -169.0949249267578, 983.1301879882812, 910.901611328125, -67.598876953125, 340.2665710449219, 196.8607177734375, 23.142963409423828, 328.9468994140625, 10.503459930419922, 646.1622314453125, 597.0194091796875, 291.9491882324219, 453.49261474609375, 396.1309814453125, -637.475341796875, 71.0698471069336, -209.8615264892578, -254.63827514648438, 227.2510528564453, 276.13397216796875, -82.2938003540039, 639.0137939453125, 354.335205078125, 261.5499267578125, -2.134857177734375, 780.4268798828125, 360.6642761230469, 556.8487548828125, -223.25979614257812, 231.8055877685547, -247.29586791992188, 395.1821594238281, 159.30982971191406, 544.88427734375, 317.18359375, -149.2324981689453, -549.6044921875, -36.646949768066406, 266.67706298828125, 93.19693756103516, 581.9908447265625, 385.0739440917969, 960.277587890625, 503.0855712890625, 434.97222900390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000429.npy"}
|
|
{"epoch": 0.6299559471365639, "step": 430, "batch_size": 64, "mean": 208.2819061279297, "std": 284.055419921875, "min": -420.93231201171875, "p10": -104.74919128417966, "median": 199.57675170898438, "p90": 611.7931396484375, "max": 999.9906616210938, "pos_frac": 0.75, "sample": [223.51394653320312, 190.72129821777344, -78.8980941772461, -114.10354614257812, 721.350830078125, 384.8516540527344, 70.64425659179688, 254.3102569580078, 220.619873046875, 552.5320434570312, 21.666091918945312, -32.09819793701172, 63.16614532470703, -175.35433959960938, 177.93846130371094, 605.9734497070312, -247.065673828125, 433.96197509765625, 79.00642395019531, 91.84707641601562, 208.4322052001953, 83.8149185180664, 167.6824493408203, 208.79067993164062, -50.988014221191406, 364.3613586425781, 330.8359069824219, 344.8700866699219, -82.92236328125, 188.2580108642578, 130.09645080566406, -43.33293914794922, 999.9906616210938, -16.543350219726562, 109.2795181274414, 716.1568603515625, -4.464729309082031, -420.93231201171875, 23.101655960083008, 229.1543731689453, 461.9931640625, 803.7589111328125, 442.1341857910156, 140.12200927734375, 262.3509521484375, 723.4730224609375, -136.20619201660156, -250.08242797851562, 451.2641906738281, 230.1800079345703, 19.211742401123047, -24.019664764404297, 445.1887512207031, 246.5485076904297, -7.072750091552734, 296.38970947265625, 303.14178466796875, -260.0447998046875, 383.11871337890625, 778.4923095703125, 614.2872924804688, 219.70361328125, 32.671180725097656, 223.21226501464844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000430.npy"}
|
|
{"epoch": 0.631424375917768, "step": 431, "batch_size": 64, "mean": 297.705322265625, "std": 312.3294677734375, "min": -420.41729736328125, "p10": -115.75538711547846, "median": 305.82411193847656, "p90": 649.1554504394531, "max": 978.351318359375, "pos_frac": 0.828125, "sample": [-259.44049072265625, 138.5708770751953, -165.6572723388672, 506.1511535644531, 194.72369384765625, 76.8482437133789, 798.30517578125, 169.37530517578125, 414.5133972167969, 511.7869873046875, 290.4737243652344, 457.5894470214844, 65.09793090820312, -55.1163330078125, -158.94192504882812, 116.06242370605469, -22.981353759765625, -310.7244873046875, 599.3147583007812, -240.3898468017578, 599.583740234375, 723.6409301757812, 971.5162963867188, 978.351318359375, 134.56129455566406, 586.5657958984375, 420.9203796386719, 465.12603759765625, 244.3074188232422, 268.32574462890625, 651.6800537109375, 374.02606201171875, 235.952880859375, 98.02857971191406, 458.49951171875, -420.41729736328125, 914.431884765625, 22.558883666992188, 323.0494689941406, -58.87871551513672, 465.3869323730469, 321.17449951171875, 924.6417846679688, -12.320674896240234, 140.61012268066406, 239.93478393554688, 413.5775146484375, 84.70453643798828, 331.03887939453125, 354.7755126953125, 343.4733581542969, 607.939208984375, 643.2647094726562, 364.73822021484375, 550.7738037109375, 441.1236877441406, 178.36549377441406, 74.56513214111328, -140.131103515625, 225.3455352783203, 528.9330444335938, 142.31842041015625, 188.64126586914062, 522.8736572265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000431.npy"}
|
|
{"epoch": 0.6328928046989721, "step": 432, "batch_size": 64, "mean": 281.7933654785156, "std": 295.2173767089844, "min": -245.5845184326172, "p10": -29.607712936401356, "median": 244.45230865478516, "p90": 692.6251037597658, "max": 1254.131103515625, "pos_frac": 0.84375, "sample": [62.77934265136719, 181.91976928710938, 208.3452606201172, 243.64205932617188, 199.7314910888672, 250.1775360107422, 136.80560302734375, -34.12131881713867, 198.24542236328125, 17.604934692382812, 42.98952102661133, 12.333492279052734, 654.9979248046875, 430.6217956542969, -54.11488723754883, 1254.131103515625, 708.7510375976562, 254.68887329101562, 8.450790405273438, 194.21734619140625, 131.122314453125, -18.852203369140625, 288.3616638183594, 73.29058837890625, 448.78399658203125, 258.6202697753906, 645.630126953125, 245.26255798339844, 435.5328369140625, 17.345703125, 223.88092041015625, -19.075965881347656, -245.5845184326172, 299.873046875, -154.72000122070312, 610.8773803710938, 141.5752410888672, -122.94319152832031, 315.9256591796875, 157.65098571777344, 240.7332763671875, -148.62130737304688, 269.2842712402344, 481.09857177734375, 287.3680725097656, 407.7163391113281, 572.7228393554688, 431.5440673828125, 450.2279968261719, 412.8936767578125, 152.95289611816406, 512.4633178710938, -1.356597900390625, 802.8782348632812, 292.7435607910156, 91.148193359375, 765.05078125, 496.1450500488281, 652.5006713867188, 893.3463134765625, -228.16964721679688, 761.7288818359375, 6.027046203613281, 727.5946044921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000432.npy"}
|
|
{"epoch": 0.6343612334801763, "step": 433, "batch_size": 64, "mean": 185.99977111816406, "std": 332.2810974121094, "min": -870.1160278320312, "p10": -159.42413024902342, "median": 225.5430450439453, "p90": 544.8085510253907, "max": 689.577392578125, "pos_frac": 0.75, "sample": [32.113983154296875, 513.4409790039062, 518.914794921875, 524.4435424804688, 428.8578796386719, -85.16145324707031, 333.19476318359375, -794.4338989257812, 507.8857116699219, 547.9700317382812, -870.1160278320312, 51.95082092285156, 635.2515869140625, 379.7098083496094, 118.5003662109375, -18.685291290283203, 163.89198303222656, 377.8313903808594, 499.6016845703125, -756.1724853515625, -18.203285217285156, 594.0956420898438, 598.875732421875, 244.8837890625, 144.86964416503906, 200.3527374267578, 77.91365051269531, 149.23171997070312, -96.41404724121094, 449.74224853515625, -2.34033203125, 689.577392578125, -285.5760803222656, 431.8011474609375, 268.38568115234375, 414.84503173828125, 537.4317626953125, -283.71539306640625, 294.17987060546875, -13.423271179199219, 659.14697265625, 531.03564453125, 265.5895690917969, -130.65869140625, 177.6552734375, 66.1164779663086, 295.5849304199219, 4.310249328613281, -170.23167419433594, -223.74530029296875, 612.9545288085938, 392.9911193847656, 222.37615966796875, 49.46541213989258, 228.70993041992188, -134.20652770996094, 108.7994155883789, 373.0517272949219, -50.6865234375, 35.513328552246094, 276.2538146972656, 427.7916259765625, 140.7113494873047, 239.95298767089844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000433.npy"}
|
|
{"epoch": 0.6358296622613803, "step": 434, "batch_size": 64, "mean": 233.06314086914062, "std": 358.6510009765625, "min": -248.54006958007812, "p10": -114.56968994140624, "median": 157.5232696533203, "p90": 669.1182983398437, "max": 1989.0224609375, "pos_frac": 0.734375, "sample": [489.2241516113281, 170.65756225585938, 1989.0224609375, 116.72401428222656, -117.46726989746094, 94.21713256835938, 188.61541748046875, 425.4303283691406, 26.25907325744629, -56.24638366699219, 257.8086242675781, -35.51560974121094, 2.746175765991211, 667.047607421875, -107.80867004394531, 99.01956176757812, -121.23723602294922, -248.54006958007812, 493.7417907714844, 619.0296020507812, 770.1114501953125, -173.49081420898438, 120.11505126953125, 251.58633422851562, 486.03485107421875, 203.2478485107422, -36.720726013183594, -25.006471633911133, 244.75025939941406, 260.9799499511719, 28.851715087890625, 141.6641845703125, 455.1419372558594, -130.3790740966797, -18.926254272460938, 474.6333923339844, 120.81619262695312, 185.62615966796875, 203.322998046875, 236.68475341796875, -32.93341064453125, 395.3016662597656, 670.0057373046875, 68.55882263183594, 163.10629272460938, 267.5563049316406, 469.6773376464844, 150.98802185058594, 151.94024658203125, 807.2129516601562, 273.9322509765625, -57.826568603515625, 787.802490234375, -74.08570861816406, 131.37306213378906, 61.814971923828125, 813.2565307617188, 230.010009765625, -102.97482299804688, -237.73019409179688, 908.6242065429688, 514.1632690429688, 4.6364898681640625, -200.14126586914062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000434.npy"}
|
|
{"epoch": 0.6372980910425844, "step": 435, "batch_size": 64, "mean": 252.46389770507812, "std": 311.1219787597656, "min": -632.4189453125, "p10": -70.20884246826171, "median": 224.2421646118164, "p90": 613.5241271972657, "max": 955.7738037109375, "pos_frac": 0.828125, "sample": [949.3775634765625, 442.95660400390625, -632.4189453125, 372.1544189453125, 588.2313232421875, 209.85635375976562, 342.9266357421875, 509.0615234375, 363.5501708984375, 119.67539978027344, 570.6320190429688, 201.5507354736328, 64.93278503417969, 236.514404296875, -74.34098815917969, 607.0278930664062, 955.7738037109375, 821.0048828125, 247.3958740234375, -119.45475769042969, 15.299530029296875, 6.682281494140625, -20.58770751953125, 92.21086120605469, -113.53643798828125, 128.9557342529297, 236.9504852294922, 154.67855834960938, -129.750732421875, 213.637451171875, 276.7253723144531, 38.76206970214844, 149.3446044921875, 508.8583679199219, 519.3616943359375, 344.6097106933594, 376.174072265625, -449.560302734375, 484.2784729003906, -356.7249755859375, 200.93612670898438, 294.9646911621094, 616.3082275390625, 11.2154541015625, 70.57999420166016, 2.191333770751953, 603.4501953125, 592.3364868164062, -3.3080711364746094, -37.94268798828125, 824.1190185546875, 45.19799041748047, 295.2870178222656, 89.67608642578125, 169.02810668945312, 692.2100219726562, 691.9356689453125, 281.2513122558594, 223.60411071777344, 520.1619262695312, 164.5609588623047, -60.567169189453125, 392.83624267578125, 224.88021850585938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000435.npy"}
|
|
{"epoch": 0.6387665198237885, "step": 436, "batch_size": 64, "mean": 220.94302368164062, "std": 299.0478515625, "min": -617.2203369140625, "p10": -127.47192306518555, "median": 200.33131408691406, "p90": 573.9732116699219, "max": 944.840576171875, "pos_frac": 0.796875, "sample": [-131.9987335205078, 140.27027893066406, -126.55838012695312, 57.722068786621094, 575.3416137695312, 272.1943054199219, 352.9288330078125, 816.1925048828125, 632.5405883789062, -119.9120101928711, 153.15843200683594, 827.2952880859375, -35.597808837890625, -139.5027313232422, 276.6951904296875, 287.81158447265625, -170.5148468017578, 44.859161376953125, 909.87158203125, -301.95721435546875, 450.15423583984375, 474.3766174316406, 259.53302001953125, 406.67706298828125, 570.7802734375, -6.39227294921875, 373.92919921875, 512.349853515625, 19.701766967773438, 73.19783020019531, 944.840576171875, 57.53504180908203, 510.9104309082031, 176.8969268798828, 304.3393249511719, 198.95899963378906, -2.4909210205078125, 18.028968811035156, 268.0826110839844, 130.893798828125, 419.05206298828125, 73.37875366210938, 221.50416564941406, 139.94287109375, 182.3658905029297, 283.6896057128906, 232.2282257080078, -169.2109375, 173.69700622558594, 207.6802978515625, 261.6090087890625, 213.015380859375, 413.8766174316406, 497.98028564453125, 910.0955810546875, 27.284194946289062, 86.44095611572266, 25.065261840820312, -617.2203369140625, 201.70362854003906, -127.86344146728516, 51.06888198852539, 449.63287353515625, -79.80636596679688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000436.npy"}
|
|
{"epoch": 0.6402349486049926, "step": 437, "batch_size": 64, "mean": 228.09780883789062, "std": 288.26373291015625, "min": -512.4094848632812, "p10": -86.24424438476562, "median": 220.6804428100586, "p90": 554.0132934570313, "max": 949.8761596679688, "pos_frac": 0.75, "sample": [104.36186218261719, -240.9345703125, 296.4149169921875, 544.01513671875, 386.5162658691406, -43.19672393798828, 604.0639038085938, 176.02102661132812, 318.72369384765625, -25.408294677734375, -512.4094848632812, 269.4919738769531, -296.920654296875, 489.7416076660156, -17.3480224609375, 192.9224853515625, 171.90829467773438, -268.48858642578125, 154.38873291015625, 519.9219970703125, 43.152000427246094, 464.63482666015625, 216.75559997558594, -32.98717498779297, -211.76718139648438, 621.6400146484375, 200.44815063476562, 114.93587493896484, 398.15472412109375, 467.94354248046875, 812.1870727539062, 488.15045166015625, 780.10986328125, -128.8769989013672, 949.8761596679688, -83.75325012207031, -60.38762664794922, 0.7323436737060547, 260.68780517578125, 175.87991333007812, 1.7345809936523438, -15.952728271484375, 276.1766052246094, 314.6235046386719, 426.107666015625, 120.88577270507812, 436.75042724609375, 434.3238220214844, 532.7644653320312, 558.2982177734375, 202.84877014160156, 385.8053283691406, 428.44110107421875, -49.8505859375, -79.7052230834961, 687.47265625, 330.4751892089844, 404.44140625, -87.31181335449219, 273.2451171875, 68.50030517578125, 417.7865295410156, 224.60528564453125, 4.491676330566406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000437.npy"}
|
|
{"epoch": 0.6417033773861968, "step": 438, "batch_size": 64, "mean": 261.7484436035156, "std": 299.0746154785156, "min": -361.3745422363281, "p10": -100.07209167480467, "median": 232.61322021484375, "p90": 666.1348327636721, "max": 986.5458374023438, "pos_frac": 0.78125, "sample": [136.14260864257812, 47.15064239501953, 386.32666015625, -125.77070617675781, 229.3409423828125, 123.701171875, 845.695556640625, -12.993288040161133, 175.39639282226562, -48.6922607421875, 232.38912963867188, 382.9748229980469, -16.97281265258789, 193.2406005859375, 427.37225341796875, 447.64263916015625, 298.268310546875, 986.5458374023438, 23.304454803466797, 345.35455322265625, 182.24423217773438, 614.5640258789062, 373.45782470703125, 610.3643798828125, 161.34613037109375, 232.83731079101562, 26.32758331298828, 343.9331359863281, -361.3745422363281, -60.523712158203125, -106.82620239257812, -3.1175365447998047, 414.5685729980469, 770.4844360351562, 628.7613525390625, 358.20654296875, 278.4711608886719, -353.08050537109375, 557.1932373046875, 282.0198059082031, 764.6885986328125, 161.736572265625, -158.72796630859375, 757.4070434570312, -84.3125, 357.7826232910156, 560.79736328125, 385.2347717285156, 544.8390502929688, 112.21868133544922, 95.76760864257812, 327.8071594238281, 763.425537109375, 78.26588439941406, -53.660308837890625, 146.4598846435547, 151.28799438476562, -313.01531982421875, -132.93109130859375, 584.697265625, 199.6272735595703, 682.1520385742188, 340.07257080078125, 454.0050354003906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000438.npy"}
|
|
{"epoch": 0.6431718061674009, "step": 439, "batch_size": 64, "mean": 206.4326171875, "std": 340.36627197265625, "min": -482.96636962890625, "p10": -119.53141174316403, "median": 160.7615509033203, "p90": 733.8090209960938, "max": 1045.6624755859375, "pos_frac": 0.75, "sample": [234.05654907226562, 738.0316162109375, 110.35421752929688, 337.6349182128906, 105.40264892578125, -84.74911499023438, 755.7940673828125, 12.594833374023438, 165.88880920410156, 40.17607879638672, -347.82464599609375, 791.9661254882812, 88.66529846191406, -61.379539489746094, -77.49971771240234, 322.3207702636719, 302.00823974609375, 596.849365234375, 677.5601806640625, 205.46804809570312, 109.11109924316406, 938.1536865234375, 223.38430786132812, -62.590728759765625, 206.261962890625, -214.1781005859375, 155.63429260253906, -90.07548522949219, -3.7972984313964844, 35.37053680419922, 342.5507507324219, -321.272216796875, 247.68128967285156, 490.1032409667969, 1045.6624755859375, 344.7078552246094, 537.3986206054688, 192.28384399414062, 43.85560607910156, 247.3760986328125, 129.30419921875, -55.216514587402344, -424.8365478515625, -71.56298828125, 0.9792251586914062, 262.0507507324219, 133.3880615234375, 257.35760498046875, -422.53387451171875, 596.390625, 801.2875366210938, 13.66970443725586, 234.8938751220703, 723.956298828125, 111.18590545654297, -132.15538024902344, 480.5947265625, 363.0372619628906, 84.85043334960938, 88.39607238769531, 984.1797485351562, 239.1424560546875, -482.96636962890625, -84.64643859863281], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000439.npy"}
|
|
{"epoch": 0.644640234948605, "step": 440, "batch_size": 64, "mean": 245.0926513671875, "std": 342.2526550292969, "min": -229.68072509765625, "p10": -87.17445907592771, "median": 150.9848861694336, "p90": 694.5493408203125, "max": 1496.3935546875, "pos_frac": 0.796875, "sample": [240.79322814941406, 333.6954040527344, 390.7434387207031, 372.8759765625, -147.211181640625, 31.79540252685547, 604.8438110351562, 149.4189453125, 73.44163513183594, 829.7902221679688, 53.74437713623047, 49.338470458984375, 185.562744140625, 124.65093994140625, 698.6417846679688, -206.12924194335938, 147.61978149414062, -215.5004425048828, 27.891693115234375, 6.320762634277344, 170.06228637695312, 645.4834594726562, 65.12680053710938, 56.67786407470703, 371.70587158203125, 232.6699676513672, -103.54183197021484, 248.700927734375, 398.33233642578125, 199.3798370361328, 144.53570556640625, 765.3434448242188, -53.220977783203125, 267.86083984375, 12.087038040161133, -4.309906005859375, 1244.5335693359375, -198.37344360351562, 242.564453125, 22.18305015563965, 832.799072265625, 267.8344421386719, 975.792236328125, 497.486572265625, 216.52279663085938, 54.52564239501953, -97.57235717773438, 129.96971130371094, 455.6232604980469, 494.8544921875, 110.72552490234375, 685.0003051757812, -28.21570587158203, 152.5508270263672, 18.542766571044922, 489.980712890625, 40.75322341918945, -40.53235626220703, -62.912696838378906, 221.74526977539062, 1496.3935546875, 535.761474609375, -229.68072509765625, -12.148193359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000440.npy"}
|
|
{"epoch": 0.6461086637298091, "step": 441, "batch_size": 64, "mean": 243.21163940429688, "std": 430.8845520019531, "min": -753.8775024414062, "p10": -209.32743988037106, "median": 199.3617401123047, "p90": 780.7288330078127, "max": 1950.1273193359375, "pos_frac": 0.734375, "sample": [239.32237243652344, 143.38870239257812, 286.298583984375, 524.1126098632812, 405.855224609375, 798.3037719726562, -58.32476806640625, 27.348114013671875, 143.77804565429688, -380.9221496582031, -187.76365661621094, -78.3254623413086, 744.421630859375, 906.7129516601562, 311.0107421875, -508.7322692871094, 112.24909973144531, 798.3784790039062, 147.87103271484375, 16.961395263671875, 40.76615905761719, 102.72386169433594, 31.565582275390625, 801.110107421875, 796.2890625, 123.13546752929688, 214.6226348876953, 433.53619384765625, 369.58465576171875, 1950.1273193359375, 14.130743026733398, -488.8711853027344, 360.339599609375, 547.454833984375, -450.2642822265625, -112.50558471679688, 135.29013061523438, 253.22531127929688, 612.729248046875, -403.5002746582031, -66.14590454101562, 490.76025390625, -107.05436706542969, -753.8775024414062, 477.44036865234375, 429.611328125, 401.6566162109375, 452.538818359375, 308.9387512207031, -84.32022094726562, 184.10084533691406, -218.56906127929688, -94.719970703125, 464.3233642578125, 335.611572265625, 127.27044677734375, 646.1520385742188, 381.62957763671875, 580.6121215820312, 1115.30322265625, -47.385746002197266, -41.646934509277344, 134.56793212890625, 725.3139038085938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000441.npy"}
|
|
{"epoch": 0.6475770925110133, "step": 442, "batch_size": 64, "mean": 283.212890625, "std": 388.65576171875, "min": -473.48419189453125, "p10": -64.58719902038573, "median": 168.62572479248047, "p90": 858.7568115234377, "max": 1219.9248046875, "pos_frac": 0.765625, "sample": [-68.8858871459961, 185.80667114257812, 547.611572265625, 20.8648681640625, 277.21038818359375, 145.76687622070312, -30.424949645996094, 240.43687438964844, 65.6083984375, -68.93939208984375, -21.055744171142578, 412.4182434082031, 1.7636032104492188, 516.6063842773438, 357.04754638671875, 672.3080444335938, 997.6198120117188, 389.26318359375, -11.810211181640625, 88.09007263183594, 96.36994171142578, 560.4081420898438, 22.9990234375, 256.2899475097656, -242.7748260498047, 220.94036865234375, 785.0003051757812, 306.3907470703125, 874.4857177734375, 161.8604736328125, 77.29840087890625, -473.48419189453125, 112.80569458007812, -54.55692672729492, -5.882087707519531, 581.6426391601562, -48.04738998413086, 475.8664245605469, 43.97820281982422, 1125.102783203125, -260.8731689453125, 392.9988708496094, 822.0560302734375, 73.58118438720703, 44.865966796875, 1219.9248046875, 786.04736328125, 294.2650451660156, 638.18212890625, 170.94253540039062, -8.34381103515625, -369.2793884277344, 58.9014892578125, 42.78321838378906, -26.634719848632812, -230.5489044189453, 312.7720947265625, 439.069091796875, 1172.710693359375, 719.5206909179688, 1141.273681640625, 893.9212036132812, 166.3089141845703, 37.18055725097656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000442.npy"}
|
|
{"epoch": 0.6490455212922174, "step": 443, "batch_size": 64, "mean": 179.22032165527344, "std": 401.8019104003906, "min": -866.1666259765625, "p10": -272.58109283447266, "median": 172.8091583251953, "p90": 640.9895019531252, "max": 1328.835693359375, "pos_frac": 0.734375, "sample": [1175.9376220703125, 86.01154327392578, 878.4112548828125, 19.340072631835938, 321.05047607421875, 113.45500183105469, -80.57990264892578, -105.95234680175781, -379.26885986328125, 136.54603576660156, 109.28013610839844, -6.7306976318359375, 174.03567504882812, 171.5826416015625, 77.27603149414062, -647.6952514648438, 119.00807189941406, -94.52687072753906, -50.056785583496094, -51.0101318359375, 1175.1429443359375, 335.183349609375, 555.4535522460938, 316.1357116699219, -693.7158813476562, -603.8688354492188, 225.45474243164062, 207.65196228027344, 664.6578979492188, -866.1666259765625, 410.27520751953125, 354.01483154296875, 271.9346923828125, 128.7482147216797, 167.4314727783203, -104.55982208251953, 181.04620361328125, 418.106689453125, 440.2757263183594, 200.9684295654297, -250.8867645263672, -45.57139587402344, 415.7846374511719, -290.3294372558594, 329.33251953125, 97.32994079589844, 298.8077087402344, 1328.835693359375, 272.22528076171875, 143.318115234375, 585.7632446289062, 82.66848754882812, 176.48570251464844, 139.25282287597656, -183.8013458251953, 256.1836853027344, -281.878662109375, 276.65887451171875, 114.49085235595703, 242.27639770507812, 840.8306884765625, 776.4405517578125, 190.81179809570312, 204.78662109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000443.npy"}
|
|
{"epoch": 0.6505139500734214, "step": 444, "batch_size": 64, "mean": 301.04541015625, "std": 393.59844970703125, "min": -669.7508544921875, "p10": -111.00416564941405, "median": 290.135009765625, "p90": 793.7643066406256, "max": 1449.796630859375, "pos_frac": 0.78125, "sample": [395.95611572265625, -296.73895263671875, 88.94434356689453, 576.90380859375, 43.35521697998047, 361.826171875, -181.4425811767578, 1449.796630859375, 877.2991943359375, -43.02031326293945, -117.64041137695312, 289.4505920410156, 263.31787109375, 572.3695678710938, 631.0460205078125, 221.35491943359375, 855.735107421875, 445.2900390625, -137.18661499023438, 92.97663879394531, -3.2544498443603516, -35.986900329589844, 415.9743347167969, 975.383544921875, 29.233230590820312, 393.74334716796875, 323.6542663574219, -36.42156982421875, 290.8194274902344, -400.3988037109375, -24.99444007873535, 1407.1771240234375, 304.93988037109375, 294.7945861816406, 317.583984375, 460.66058349609375, 1185.095703125, 440.54168701171875, 536.4442138671875, 237.40151977539062, -95.51959228515625, 971.8356323242188, 625.6414794921875, 54.958396911621094, 551.88427734375, 551.8642578125, 575.6810913085938, 5.027872085571289, 182.89356994628906, 151.5952911376953, 292.85302734375, 211.4827117919922, 327.5085144042969, 172.4254150390625, 474.89007568359375, 231.2315673828125, 28.033109664916992, -51.56593322753906, -259.6168518066406, 69.08486938476562, 649.165771484375, -669.7508544921875, 198.40896606445312, 514.9091186523438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000444.npy"}
|
|
{"epoch": 0.6519823788546255, "step": 445, "batch_size": 64, "mean": 274.0172119140625, "std": 353.38446044921875, "min": -444.21087646484375, "p10": -128.76868896484373, "median": 211.5077667236328, "p90": 689.347216796875, "max": 1214.563232421875, "pos_frac": 0.78125, "sample": [219.33541870117188, 46.740814208984375, 16.420066833496094, 838.7125244140625, 41.62849426269531, 593.373291015625, 489.47357177734375, 148.95208740234375, 68.572265625, 171.77732849121094, 132.652099609375, 557.8429565429688, -11.002182006835938, 415.9329528808594, 202.77078247070312, -444.21087646484375, 118.60333251953125, 466.4228210449219, 464.642333984375, 186.5458221435547, 413.2547302246094, 356.3747863769531, 514.5828857421875, 690.1990966796875, 76.02287292480469, 144.98236083984375, 1214.563232421875, 462.7839050292969, 494.56793212890625, 622.4317016601562, 767.5201416015625, 619.9415893554688, 167.93626403808594, 687.3594970703125, 28.008041381835938, -192.810302734375, 266.0340881347656, 303.1108093261719, -139.8323516845703, 522.28662109375, 4.737571716308594, 1066.2796630859375, 530.6826171875, -285.41949462890625, 787.089599609375, 366.22332763671875, 165.77734375, -223.247314453125, -69.04064178466797, -66.31608581542969, 294.3300476074219, -74.0174331665039, -102.95347595214844, 427.9530334472656, 185.9471435546875, 248.5425567626953, -32.079376220703125, -242.39852905273438, 1192.1490478515625, -84.40580749511719, 598.095703125, 203.68011474609375, 268.784423828125, -367.7956848144531], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000445.npy"}
|
|
{"epoch": 0.6534508076358296, "step": 446, "batch_size": 64, "mean": 297.6376037597656, "std": 309.880126953125, "min": -593.6709594726562, "p10": -35.325327301025375, "median": 273.12139892578125, "p90": 748.6624389648439, "max": 1075.8548583984375, "pos_frac": 0.84375, "sample": [-20.53679656982422, 301.5478515625, 36.05915832519531, 786.8770751953125, 831.6087036132812, 349.5799255371094, 31.543170928955078, -593.6709594726562, 201.25819396972656, 649.4022827148438, 826.9451293945312, 403.0108642578125, 0.7388076782226562, 723.9465942382812, 546.0313720703125, 576.8560180664062, 573.3487548828125, 347.1002197265625, 160.88461303710938, 389.686279296875, -41.66326904296875, 137.2191162109375, 145.21864318847656, 240.3112030029297, 203.7122344970703, -334.84478759765625, 120.91078186035156, 206.34194946289062, 485.54376220703125, 261.2872314453125, 568.239013671875, -48.46452331542969, 57.257476806640625, 194.29208374023438, 18.51661491394043, 227.08401489257812, -13.182647705078125, 515.510986328125, 296.5710144042969, 833.267822265625, 368.74810791015625, 156.35862731933594, 759.2549438476562, 1075.8548583984375, 471.2088317871094, -48.333518981933594, 794.783203125, 457.0231018066406, 304.656005859375, 299.7851257324219, 434.9461975097656, -0.2670440673828125, -96.08270263671875, 173.95132446289062, 88.61101531982422, 163.87103271484375, 192.2509765625, 378.45269775390625, 683.2182006835938, 209.94390869140625, -253.62588500976562, 661.7161254882812, 284.95556640625, 292.1795349121094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000446.npy"}
|
|
{"epoch": 0.6549192364170338, "step": 447, "batch_size": 64, "mean": 303.20330810546875, "std": 364.894775390625, "min": -418.2218017578125, "p10": -116.30771484374999, "median": 250.1058349609375, "p90": 710.4942382812502, "max": 1873.76904296875, "pos_frac": 0.84375, "sample": [215.55630493164062, 126.0027847290039, 432.3634033203125, 218.8224334716797, 38.870513916015625, -418.2218017578125, 483.8578186035156, 274.4742736816406, 333.5957946777344, -66.864013671875, 62.30849075317383, 763.2919311523438, 266.693359375, 188.2503204345703, 666.29443359375, 531.5831909179688, 814.9196166992188, 73.22447204589844, 1873.76904296875, 787.8364868164062, 193.64230346679688, 1016.609375, 487.40789794921875, 440.5338134765625, -234.50636291503906, 233.518310546875, 576.67919921875, -151.1158447265625, 300.4760437011719, 187.9807891845703, -142.0040283203125, 21.561264038085938, 2.9873428344726562, -121.47610473632812, 389.99945068359375, -104.24813842773438, 195.48193359375, 572.1256103515625, 94.3609619140625, 32.76923370361328, 558.9758911132812, 73.68590545654297, 130.6330108642578, -309.4825439453125, 729.43701171875, 481.50860595703125, 427.7666320800781, -158.89752197265625, 197.6397705078125, 28.032480239868164, 431.606689453125, 406.1818542480469, 386.0101318359375, 347.9574890136719, 152.8968505859375, 586.2245483398438, 303.2273254394531, 122.86494445800781, 1002.062744140625, 605.2034912109375, 90.01677703857422, 614.5289916992188, 568.5787963867188, -31.060943603515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000447.npy"}
|
|
{"epoch": 0.6563876651982379, "step": 448, "batch_size": 64, "mean": 281.41815185546875, "std": 342.5064392089844, "min": -540.8357543945312, "p10": -127.09532318115232, "median": 229.603515625, "p90": 646.697802734375, "max": 1328.23291015625, "pos_frac": 0.828125, "sample": [-1.7606887817382812, 63.27480697631836, -341.4258117675781, 164.02801513671875, 629.4683227539062, 22.44805908203125, 183.11708068847656, -98.55985260009766, 295.35467529296875, -540.8357543945312, 483.3043212890625, 63.71613311767578, 620.0299072265625, 62.59393310546875, 235.83383178710938, 505.113525390625, 16.851491928100586, -37.37644958496094, 634.7620849609375, -141.52822875976562, -136.83412170410156, 111.74652099609375, 748.2483520507812, 444.9189453125, 223.37319946289062, 263.6141052246094, 186.47891235351562, 166.3388671875, 485.1251220703125, 808.4852294921875, 307.2384033203125, 566.910400390625, 42.035911560058594, 413.70965576171875, 1328.23291015625, 12.90211296081543, 80.75788879394531, 195.1587371826172, 114.4371109008789, 312.3333740234375, 620.2391357421875, 600.1906127929688, 241.41368103027344, -155.8321533203125, 1017.2139282226562, 329.16986083984375, 183.488525390625, -104.3714599609375, 448.3132019042969, 82.90249633789062, 321.0017395019531, -234.66477966308594, 1001.872314453125, -152.0938720703125, 590.0833129882812, 870.6939086914062, 45.890899658203125, 383.66314697265625, 123.50617218017578, 306.3545837402344, 574.7169799804688, 651.8131103515625, 550.3069458007812, 221.26910400390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000448.npy"}
|
|
{"epoch": 0.657856093979442, "step": 449, "batch_size": 64, "mean": 294.76324462890625, "std": 428.3898010253906, "min": -799.3037109375, "p10": -165.43976440429685, "median": 231.09127807617188, "p90": 782.8262207031252, "max": 2247.921142578125, "pos_frac": 0.765625, "sample": [191.9852752685547, -91.1227035522461, 325.96185302734375, 13.192794799804688, 755.5758666992188, 728.260498046875, 282.86370849609375, 50.59941864013672, 167.57525634765625, 246.69601440429688, 892.3289794921875, -179.5172882080078, -34.57147979736328, 215.48654174804688, 664.1136474609375, 25.458019256591797, 470.00177001953125, 180.86285400390625, 115.24330139160156, -193.5096435546875, 796.0096435546875, 282.19970703125, 558.9775390625, 680.9542236328125, 291.73211669921875, 683.6380615234375, -223.60794067382812, 34.59910583496094, 418.077880859375, 415.5574035644531, 198.22174072265625, 209.43325805664062, 45.4285888671875, 589.8502197265625, 501.40509033203125, 753.2056274414062, -117.51677703857422, 152.57713317871094, -120.01544189453125, -140.31211853027344, 2247.921142578125, 589.142578125, -208.1127471923828, 327.0726013183594, 388.25616455078125, 165.98345947265625, -62.65191650390625, -799.3037109375, -37.58511734008789, 979.7570190429688, 275.2571716308594, 827.0078125, 794.5049438476562, -210.34742736816406, 462.223876953125, 169.42959594726562, 427.54925537109375, 977.727294921875, 303.78778076171875, -49.60072326660156, -176.20875549316406, 29.734130859375, 167.63417053222656, 437.7698059082031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000449.npy"}
|
|
{"epoch": 0.6593245227606461, "step": 450, "batch_size": 64, "mean": 312.71868896484375, "std": 311.4277038574219, "min": -757.5628662109375, "p10": -10.591577911376948, "median": 267.7369384765625, "p90": 718.2314270019532, "max": 1172.798583984375, "pos_frac": 0.875, "sample": [348.1812744140625, 613.1373291015625, 724.4178466796875, 426.3196716308594, 101.546142578125, 686.4505615234375, 1172.798583984375, -68.89591979980469, 340.2535705566406, -5.598197937011719, 648.5928344726562, 498.6483154296875, 703.7964477539062, 254.67691040039062, 406.8782653808594, 779.0587158203125, 91.34552001953125, 116.9725341796875, 229.31732177734375, 94.09051513671875, 375.78277587890625, 609.310546875, 480.0472412109375, 751.798095703125, 193.55516052246094, -41.24209213256836, 99.75382995605469, 878.421875, 368.92584228515625, 645.9657592773438, 108.19718933105469, 370.5986328125, 267.62054443359375, 212.1907501220703, 305.32073974609375, 31.20273208618164, 209.9459686279297, 437.5004577636719, 248.38449096679688, 126.72590637207031, 876.1090698242188, -181.28660583496094, 194.24420166015625, 415.6561279296875, 135.76145935058594, 344.63861083984375, 56.282798767089844, 499.2958679199219, 223.1623992919922, 59.8719596862793, 470.5423583984375, -12.731597900390625, 553.8605346679688, 275.0001220703125, 23.652196884155273, -757.5628662109375, 315.802734375, 267.85333251953125, -18.30370330810547, 179.28407287597656, 134.97402954101562, 983.2805786132812, -16.337486267089844, 148.94903564453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000450.npy"}
|
|
{"epoch": 0.6607929515418502, "step": 451, "batch_size": 64, "mean": 232.50131225585938, "std": 412.1432800292969, "min": -1165.723388671875, "p10": -227.123355102539, "median": 239.6020965576172, "p90": 691.4697021484378, "max": 1166.313232421875, "pos_frac": 0.796875, "sample": [85.69540405273438, -84.85678100585938, 43.86846923828125, 258.86187744140625, 546.5524291992188, 298.5528259277344, 348.4818115234375, 725.33740234375, -40.82862091064453, 276.63555908203125, 519.529052734375, 1166.313232421875, 522.059814453125, 815.417236328125, 129.2720947265625, 220.34231567382812, 130.8875732421875, -1165.723388671875, -593.2817993164062, -250.16326904296875, 118.24947357177734, 84.9197769165039, 491.3979187011719, 535.6549682617188, 540.090087890625, 302.0939636230469, 472.33538818359375, 174.37306213378906, 209.00509643554688, 611.6678466796875, 121.66030883789062, 1095.671630859375, 922.4078369140625, 349.3274841308594, 495.0689392089844, 557.016845703125, 435.85894775390625, -385.73968505859375, 287.3533630371094, 281.7757568359375, -43.664276123046875, -110.48117065429688, 428.6990661621094, -971.3984375, 150.44883728027344, 196.51760864257812, 195.1599884033203, 195.47030639648438, 307.7550964355469, 41.775230407714844, 612.445068359375, -253.6688690185547, -173.36355590820312, 788.3679809570312, 523.0881958007812, 135.36001586914062, 396.1820068359375, -445.4695129394531, 202.96267700195312, 9.3680419921875, 745.105712890625, -171.1631622314453, 314.902587890625, 152.54153442382812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000451.npy"}
|
|
{"epoch": 0.6622613803230544, "step": 452, "batch_size": 64, "mean": 277.29498291015625, "std": 276.45880126953125, "min": -332.5181579589844, "p10": -82.34412078857422, "median": 244.67642211914062, "p90": 672.3895935058595, "max": 899.6095581054688, "pos_frac": 0.8125, "sample": [752.71484375, -174.64901733398438, 554.2007446289062, 612.150634765625, -84.0781478881836, 486.38409423828125, 395.9810791015625, 518.211669921875, 505.1620178222656, -78.29805755615234, 542.75244140625, 260.9425964355469, 142.7157745361328, 153.69046020507812, 325.16986083984375, 292.7492370605469, 622.3286743164062, 292.12762451171875, 461.78668212890625, 80.69883728027344, 336.7069396972656, 0.9663772583007812, 405.2802734375, 87.53583526611328, 320.82159423828125, 689.5593872070312, 81.13726806640625, -247.00485229492188, 510.9151306152344, 225.33786010742188, -58.39137268066406, 465.3050537109375, 84.7498779296875, 129.69097900390625, 729.9004516601562, 115.26795959472656, 196.2852325439453, 662.4931640625, 121.88024139404297, 197.8128662109375, 228.41024780273438, -99.42138671875, 206.92303466796875, 690.379638671875, 208.3358154296875, 394.335693359375, 130.78460693359375, 740.26171875, 899.6095581054688, 91.35087585449219, 165.6472930908203, 447.92938232421875, 384.777587890625, -12.787418365478516, -332.5181579589844, 341.7579345703125, -127.35565185546875, 519.9290771484375, 429.09503173828125, 676.6309204101562, -122.53124237060547, 193.59524536132812, -24.182907104492188, -3.0696468353271484], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000452.npy"}
|
|
{"epoch": 0.6637298091042585, "step": 453, "batch_size": 64, "mean": 258.44171142578125, "std": 342.95660400390625, "min": -347.0584411621094, "p10": -127.43044357299803, "median": 188.82168579101562, "p90": 691.2347900390627, "max": 1570.59912109375, "pos_frac": 0.734375, "sample": [568.5540771484375, 536.0327758789062, -115.37065887451172, 120.85491180419922, 230.99639892578125, 166.9409942626953, 261.816650390625, 99.69645690917969, 209.77322387695312, 62.94818115234375, 260.7158508300781, 515.3457641601562, 497.0696105957031, 569.4307861328125, 42.852603912353516, 88.49183654785156, 323.38897705078125, 169.29119873046875, -132.5989227294922, 56.10023498535156, 565.5731201171875, -174.5524444580078, 233.53726196289062, 310.34429931640625, 644.8399658203125, -211.00787353515625, -72.29991149902344, 711.1182861328125, -347.0584411621094, 360.027587890625, -61.47257995605469, 200.3211669921875, 527.5142211914062, 816.6832275390625, 791.8721313476562, -16.037120819091797, -64.71049499511719, 144.45706176757812, 1570.59912109375, 799.6514282226562, 403.6321105957031, -91.9367904663086, 774.9608764648438, -16.931617736816406, 270.29638671875, 103.43414306640625, 571.6650390625, 177.32220458984375, 401.19677734375, 391.748291015625, -249.50379943847656, 504.75042724609375, -166.01165771484375, 542.7260131835938, 60.38330078125, -67.94901275634766, -153.24818420410156, 165.3006591796875, 961.005615234375, 468.8268737792969, -30.658435821533203, -40.35639190673828, 138.39666748046875, 159.4888916015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000453.npy"}
|
|
{"epoch": 0.6651982378854625, "step": 454, "batch_size": 64, "mean": 294.9285888671875, "std": 351.8968811035156, "min": -627.368896484375, "p10": -27.241162109374976, "median": 236.39071655273438, "p90": 657.9208740234376, "max": 1385.812744140625, "pos_frac": 0.875, "sample": [236.78094482421875, 56.9005126953125, 971.6472778320312, 231.1817169189453, 579.0236206054688, 87.99217224121094, 634.00927734375, 306.31634521484375, 22.68698501586914, 173.56565856933594, -555.6647338867188, 582.0557250976562, 317.1202087402344, 331.383544921875, 236.00048828125, 274.8299560546875, 392.9197082519531, 371.4425048828125, 14.528633117675781, -627.368896484375, 30.61149787902832, 585.4260864257812, -51.38128662109375, 780.7939453125, -260.7757873535156, -74.04498291015625, 621.816650390625, 103.47098541259766, 56.95802307128906, 229.5926513671875, 528.538330078125, 140.71641540527344, 347.9437255859375, 116.9946517944336, 462.73095703125, 125.4852523803711, 226.940185546875, 120.7197265625, 301.806884765625, 196.69004821777344, 442.567138671875, 459.9066467285156, 108.38777160644531, 353.8476867675781, -5.5473785400390625, 668.168701171875, 1385.812744140625, 374.8086853027344, 54.58106231689453, 160.53515625, 1341.9161376953125, 1019.638916015625, 287.8486633300781, 521.0732421875, 70.2166976928711, 456.3302917480469, 550.62158203125, -81.84927368164062, 407.4430847167969, -36.53849792480469, 201.42323303222656, 104.73532104492188, 724.5967407226562, 76.52006530761719], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000454.npy"}
|
|
{"epoch": 0.6666666666666666, "step": 455, "batch_size": 64, "mean": 339.64556884765625, "std": 382.1095275878906, "min": -548.8935546875, "p10": -69.55535087585447, "median": 236.27769470214844, "p90": 926.1752075195316, "max": 1297.5086669921875, "pos_frac": 0.875, "sample": [115.06135559082031, 165.11785888671875, 729.989501953125, 953.7691650390625, -361.302978515625, 350.7195129394531, 664.9130859375, -79.83805847167969, 1077.556640625, -205.5781707763672, 1049.897705078125, 220.0259552001953, 861.789306640625, -289.3708801269531, 1060.4803466796875, -112.46520233154297, 224.81201171875, 677.4317016601562, 234.44216918945312, 171.66773986816406, 105.97601318359375, 459.2610168457031, 217.63584899902344, 23.242136001586914, 374.4409484863281, 1101.396240234375, 648.436279296875, -136.31640625, 292.12091064453125, 402.20135498046875, 237.97393798828125, 545.4495849609375, 226.96337890625, 180.95639038085938, 205.3789520263672, 209.99957275390625, 272.29132080078125, 329.9908142089844, 92.7034912109375, -548.8935546875, 333.10797119140625, 84.79519653320312, 325.0887756347656, 565.1731567382812, 45.48931884765625, 74.33606719970703, 41.375396728515625, 451.88800048828125, 802.5907592773438, 234.58145141601562, 654.2088623046875, 138.36410522460938, 343.1568603515625, 382.7613830566406, 84.93744659423828, 112.02401733398438, 5.722797393798828, 810.6263427734375, 488.72265625, 409.5385437011719, 1153.175537109375, 193.37655639648438, -45.5623664855957, 1297.5086669921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000455.npy"}
|
|
{"epoch": 0.6681350954478708, "step": 456, "batch_size": 64, "mean": 334.0071716308594, "std": 336.36090087890625, "min": -188.68081665039062, "p10": -5.992239379882801, "median": 260.4949188232422, "p90": 838.3354553222657, "max": 1408.7548828125, "pos_frac": 0.890625, "sample": [123.05511474609375, 181.74571228027344, 764.9349975585938, 41.355987548828125, 4.9578857421875, 303.4558410644531, 71.90554809570312, -10.685150146484375, 213.23419189453125, 737.5368041992188, 420.94635009765625, 32.827484130859375, 126.60896301269531, 1408.7548828125, 294.3787841796875, -74.42060089111328, 59.68867492675781, 290.5590515136719, 336.5692138671875, 365.9922790527344, 264.2513427734375, 65.06321716308594, -63.8524169921875, 1000.3232421875, -36.727298736572266, 411.6693115234375, 1247.98193359375, 448.6028747558594, 539.0425415039062, 58.1461181640625, 329.08892822265625, 519.7657470703125, 661.1502685546875, 148.982177734375, 162.3839111328125, 256.7384948730469, 23.595293045043945, 467.09368896484375, 922.2448120117188, 95.79368591308594, 816.7769165039062, 377.0015869140625, 268.82232666015625, 65.06233978271484, -188.68081665039062, 370.68768310546875, 68.75562286376953, -18.145416259765625, 847.5748291015625, 74.10323333740234, 880.6139526367188, 1012.6958618164062, 339.1785583496094, -12.846466064453125, 238.04367065429688, 131.4518280029297, 398.0694885253906, 98.87350463867188, 473.17919921875, 171.00933837890625, 241.13522338867188, 564.4842529296875, 717.264892578125, 226.60794067382812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000456.npy"}
|
|
{"epoch": 0.6696035242290749, "step": 457, "batch_size": 64, "mean": 339.88275146484375, "std": 333.91827392578125, "min": -355.375244140625, "p10": -84.51013603210444, "median": 382.4580383300781, "p90": 721.2239624023439, "max": 1191.8912353515625, "pos_frac": 0.828125, "sample": [-10.054618835449219, 446.6233825683594, 438.35809326171875, 359.1328125, 617.8945922851562, 682.8246459960938, 758.3623657226562, 730.2921142578125, 78.48973846435547, 53.586578369140625, 1072.7998046875, 455.7867431640625, 42.356204986572266, 240.64625549316406, 97.83387756347656, 230.7224578857422, 614.531494140625, 265.5733337402344, -0.7750473022460938, 538.4541015625, 700.06494140625, -13.46145248413086, 185.32862854003906, 562.329833984375, 382.17962646484375, 955.4198608398438, -355.375244140625, -172.08457946777344, 658.5136108398438, 582.5377197265625, 115.76329040527344, 535.78369140625, 464.9507751464844, 547.442138671875, 297.0589599609375, -109.99800109863281, 621.2576904296875, 443.6102600097656, 109.87088012695312, 11.9632568359375, 419.4337158203125, 833.8289794921875, 530.418212890625, 144.9351348876953, 35.61637496948242, 113.00904846191406, 617.1265258789062, -102.90807342529297, 539.4296264648438, 431.5575256347656, -242.6298828125, 633.9177856445312, 281.2812805175781, 809.31884765625, -284.27655029296875, 382.7364501953125, 104.83558654785156, -153.72259521484375, 394.66595458984375, -41.58161544799805, 300.47698974609375, 549.0900268554688, 27.449119567871094, 1191.8912353515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000457.npy"}
|
|
{"epoch": 0.671071953010279, "step": 458, "batch_size": 64, "mean": 302.67303466796875, "std": 383.2219543457031, "min": -549.4540405273438, "p10": -121.08548202514648, "median": 301.4980773925781, "p90": 687.1515014648439, "max": 1794.8275146484375, "pos_frac": 0.796875, "sample": [-549.4540405273438, 718.1093139648438, 479.3380126953125, 298.08984375, 591.2870483398438, -110.61412048339844, 1794.8275146484375, -83.17326354980469, 306.81219482421875, 545.699951171875, 157.44424438476562, 238.20697021484375, 481.14776611328125, 330.2706604003906, 733.2754516601562, -115.39190673828125, -121.89759826660156, 9.778661727905273, 219.39950561523438, 580.156982421875, 465.8650817871094, 504.8871765136719, 47.698081970214844, 541.4370727539062, 389.65478515625, 375.7314453125, 341.1472473144531, -18.41583251953125, 359.6619873046875, 548.4781494140625, 304.90631103515625, 711.0347900390625, 561.6968994140625, 823.6109619140625, 77.97283935546875, -5.74566650390625, 348.44805908203125, 74.15519714355469, 389.44012451171875, 262.26739501953125, 709.609130859375, 362.5413513183594, -129.58523559570312, -357.9420471191406, 209.74267578125, -262.34588623046875, 171.68267822265625, -119.19054412841797, 185.80221557617188, 1625.16796875, 542.1123657226562, 192.72329711914062, 250.0129852294922, 76.38752746582031, 172.07821655273438, 634.7503662109375, 324.9320983886719, 446.1982116699219, 252.9034881591797, 244.57928466796875, -164.9967803955078, 424.5519714355469, -276.20458984375, 248.3191680908203], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000458.npy"}
|
|
{"epoch": 0.6725403817914831, "step": 459, "batch_size": 64, "mean": 331.5897216796875, "std": 413.1249084472656, "min": -760.7987060546875, "p10": -179.01899719238276, "median": 287.12890625, "p90": 892.5488098144532, "max": 1464.4971923828125, "pos_frac": 0.828125, "sample": [71.36039733886719, 29.61529541015625, 277.8040771484375, 212.91546630859375, 286.85125732421875, 413.7723693847656, -232.6925048828125, 92.52268981933594, 586.4515380859375, -272.0907287597656, 565.0306396484375, 389.5584716796875, 473.0850524902344, 534.3076171875, 4.66187858581543, 480.0126953125, -92.30459594726562, 683.212890625, 322.6678466796875, 930.0194091796875, 25.323516845703125, 956.3989868164062, 599.2147216796875, -59.5462532043457, 318.9421081542969, -238.47557067871094, 886.4002685546875, -137.59129333496094, 169.2809600830078, 15.030706405639648, 249.7304229736328, -20.49462127685547, 858.3787841796875, -760.7987060546875, 1464.4971923828125, 287.40655517578125, 1197.3067626953125, 561.5000610351562, 244.8547821044922, -290.167724609375, 150.09619140625, 245.7781982421875, 483.7030029296875, 165.59161376953125, 45.133636474609375, 265.7309875488281, -370.9389953613281, 189.81338500976562, 447.1884460449219, 600.4581298828125, 768.4961547851562, 90.63829040527344, 330.21636962890625, 1192.0313720703125, 301.2215576171875, 102.84786224365234, 912.6755981445312, -196.7737274169922, 751.7171020507812, 757.542236328125, 895.1838989257812, 349.8087158203125, 64.99737548828125, 594.63330078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000459.npy"}
|
|
{"epoch": 0.6740088105726872, "step": 460, "batch_size": 64, "mean": 362.849609375, "std": 397.34759521484375, "min": -326.1785888671875, "p10": -141.39386596679688, "median": 317.8660583496094, "p90": 907.5962341308594, "max": 1532.1075439453125, "pos_frac": 0.828125, "sample": [903.4779663085938, 780.5442504882812, 547.5216064453125, -159.75216674804688, 109.09916687011719, 961.9359741210938, 951.41748046875, 281.95330810546875, 57.978294372558594, 218.5826873779297, -140.70779418945312, 693.8606567382812, 183.27586364746094, 530.96826171875, 189.9540252685547, 246.65301513671875, 251.98483276367188, -326.1785888671875, 161.56849670410156, 1275.425048828125, 1175.5186767578125, 234.8031005859375, 430.7835998535156, 27.33001708984375, 1001.993408203125, 260.9777526855469, 1532.1075439453125, 446.0526123046875, -141.68789672851562, 24.584442138671875, 330.8033752441406, -132.89950561523438, 582.8099975585938, -181.1868438720703, 545.0400390625, 592.2777099609375, -104.6793441772461, 466.8930358886719, 304.9287414550781, -43.21758270263672, 860.173583984375, 621.08349609375, 352.4653015136719, 410.03704833984375, 512.068359375, 168.57562255859375, 86.01097106933594, 165.52574157714844, 361.8500671386719, 253.6863250732422, -255.47618103027344, 115.42440032958984, 141.591552734375, 387.6798400878906, -265.6163330078125, 684.0255126953125, 909.3612060546875, 546.1105346679688, -227.94607543945312, 857.7670288085938, 563.3692626953125, 428.3769836425781, 30.548912048339844, 442.88671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000460.npy"}
|
|
{"epoch": 0.6754772393538914, "step": 461, "batch_size": 64, "mean": 287.71759033203125, "std": 411.5902404785156, "min": -579.1143798828125, "p10": -165.7444396972656, "median": 223.40584564208984, "p90": 782.7369201660157, "max": 1580.951904296875, "pos_frac": 0.734375, "sample": [112.3499755859375, -19.174331665039062, 784.470947265625, 394.4082336425781, -179.90264892578125, 500.5976867675781, -579.1143798828125, 1372.28466796875, 88.055419921875, 602.0429077148438, 129.81558227539062, -102.9870376586914, 307.0028076171875, 624.0115356445312, 1016.4620361328125, 448.16839599609375, 633.7069702148438, -106.35943603515625, 249.1953887939453, 400.42279052734375, 338.7325439453125, 778.6908569335938, 701.3958129882812, 973.9652099609375, -209.99732971191406, -62.691497802734375, 1152.292236328125, 1580.951904296875, 209.22586059570312, 304.663818359375, 477.3047180175781, 189.16566467285156, 224.03131103515625, 44.76544189453125, 156.23480224609375, 309.39776611328125, -232.32427978515625, -17.49072265625, -101.66201782226562, -35.137298583984375, 845.9384155273438, -85.94568634033203, 723.8848266601562, 675.8502197265625, -249.5846710205078, 272.9963073730469, 222.78038024902344, 611.7818603515625, 17.612091064453125, -132.7086181640625, 598.5179443359375, 72.4813461303711, 133.11720275878906, 135.157958984375, 501.2723388671875, -85.53620147705078, -191.90304565429688, 87.62149047851562, 279.0994873046875, 304.67254638671875, 144.29803466796875, -379.13330078125, 277.28515625, 177.3980712890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000461.npy"}
|
|
{"epoch": 0.6769456681350955, "step": 462, "batch_size": 64, "mean": 230.55760192871094, "std": 418.24053955078125, "min": -586.2332763671875, "p10": -279.3302322387695, "median": 180.06812286376953, "p90": 676.3773193359377, "max": 1373.8746337890625, "pos_frac": 0.703125, "sample": [332.05352783203125, 44.852943420410156, 5.313758850097656, -231.45375061035156, 419.9739074707031, 504.9482421875, 29.103792190551758, 502.7032165527344, 565.283447265625, 881.2637939453125, 487.3441162109375, -218.3994140625, -375.383056640625, 30.66982650756836, 162.28988647460938, 495.3380126953125, 554.9339599609375, 152.01480102539062, -225.25921630859375, 703.6810913085938, -299.8487243652344, 154.76901245117188, 391.6253356933594, -586.2332763671875, -160.81297302246094, 56.159820556640625, 612.6685180664062, -454.84320068359375, 23.104969024658203, -32.22785568237305, 1238.98876953125, 420.23822021484375, -319.1748352050781, 258.759033203125, 550.495849609375, 1062.781005859375, -222.2434844970703, -80.4703140258789, 1311.2041015625, 228.64256286621094, 170.32936096191406, -142.23155212402344, -187.2995147705078, 189.806884765625, -323.6409606933594, -42.79920959472656, 374.1672058105469, 544.7291870117188, -321.5359802246094, 151.6905517578125, -56.839569091796875, 447.536376953125, 1373.8746337890625, 542.1305541992188, 393.2474365234375, -141.9288330078125, 274.96258544921875, 552.320068359375, 776.665771484375, 480.19317626953125, 106.59114074707031, 137.554931640625, 239.51393127441406, 241.79251098632812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000462.npy"}
|
|
{"epoch": 0.6784140969162996, "step": 463, "batch_size": 64, "mean": 303.08685302734375, "std": 436.7508544921875, "min": -501.42236328125, "p10": -258.9184097290039, "median": 267.85850524902344, "p90": 823.0969055175782, "max": 1440.370849609375, "pos_frac": 0.734375, "sample": [58.11750793457031, 608.9901123046875, -42.64490509033203, 264.753173828125, -208.6466064453125, 389.4869689941406, -165.03546142578125, 16.04741668701172, 456.87713623046875, -32.59196472167969, 598.9309692382812, -265.7428283691406, 580.50830078125, 675.4722900390625, 157.31619262695312, 296.1273498535156, 364.3187561035156, 622.0865478515625, 807.06787109375, -71.53190612792969, 25.088350296020508, 1440.370849609375, 394.3097839355469, 305.5984802246094, 1166.3819580078125, 670.8638916015625, 184.50485229492188, 243.88430786132812, 29.934661865234375, 226.87930297851562, -344.9307556152344, -46.522315979003906, -501.42236328125, 176.4937744140625, 829.9664916992188, -351.6321716308594, -36.10261535644531, -172.5997314453125, 392.4482116699219, -242.99476623535156, 664.685791015625, 314.827392578125, 605.9363403320312, 265.1563720703125, 793.5699462890625, 1040.7554931640625, -42.035491943359375, 270.5606384277344, 1013.9063110351562, -347.0289306640625, 184.5446014404297, 1296.0714111328125, 259.8598327636719, 698.4227294921875, -266.15228271484375, 53.815643310546875, 398.91461181640625, 271.8999938964844, -461.0357666015625, 424.83514404296875, 505.7195129394531, 553.96142578125, 1215.5146484375, 180.4265899658203], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000463.npy"}
|
|
{"epoch": 0.6798825256975036, "step": 464, "batch_size": 64, "mean": 478.4560546875, "std": 476.39617919921875, "min": -523.4840087890625, "p10": -37.8988906860351, "median": 450.35365295410156, "p90": 996.2517028808595, "max": 2099.872802734375, "pos_frac": 0.890625, "sample": [-282.86627197265625, 960.3135986328125, 369.6253662109375, 233.85903930664062, 36.62396240234375, 1084.27783203125, 695.7884521484375, 132.2771759033203, 120.26235961914062, 434.99505615234375, 689.4169311523438, 634.260986328125, -523.4840087890625, 782.762939453125, 295.96783447265625, 596.9351196289062, 706.3499755859375, 215.3824920654297, 456.39361572265625, 789.262939453125, 759.0296630859375, 469.6868896484375, 864.8212890625, -191.9900665283203, 1011.6537475585938, 266.2537536621094, 635.5098876953125, 493.596923828125, -450.9042053222656, 167.43682861328125, 889.22705078125, 222.03515625, 547.7277221679688, 57.79104995727539, 444.3136901855469, 596.740966796875, 805.6597900390625, 227.80055236816406, 370.5892333984375, 301.42333984375, 246.52862548828125, 38.474857330322266, 883.2686767578125, 618.2319946289062, 110.76216125488281, 436.0268249511719, -119.95670318603516, 276.57440185546875, 16.53510284423828, 670.4801025390625, 576.8836059570312, 1496.6475830078125, 2099.872802734375, 1173.887939453125, 432.3331298828125, 1084.5439453125, 758.8408813476562, -61.227745056152344, 1712.0157470703125, 736.8572998046875, 520.2791748046875, -438.3421936035156, 344.8526611328125, 90.0081558227539], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000464.npy"}
|
|
{"epoch": 0.6813509544787077, "step": 465, "batch_size": 64, "mean": 380.36016845703125, "std": 493.7154235839844, "min": -1397.0531005859375, "p10": -162.67533416748046, "median": 438.0199737548828, "p90": 852.6529113769532, "max": 1853.6395263671875, "pos_frac": 0.859375, "sample": [684.6209716796875, 1380.4942626953125, 513.0576171875, 242.5097198486328, -268.51251220703125, 35.310089111328125, 579.1141967773438, 385.401611328125, 49.847694396972656, 41.180694580078125, 816.1670532226562, 282.302490234375, 382.9154052734375, 82.47087860107422, 635.046142578125, 559.3484497070312, 467.2933349609375, 425.3450012207031, 278.7674560546875, 72.12889862060547, -570.1525268554688, 975.7545776367188, 507.69488525390625, 218.15036010742188, 1853.6395263671875, -109.16957092285156, 453.84869384765625, 146.14849853515625, 368.76263427734375, 524.2957763671875, 710.9073486328125, 506.9530029296875, -169.40725708007812, 179.61984252929688, 249.50155639648438, 542.9988403320312, 18.750160217285156, 245.84375, -335.4336242675781, 658.4844970703125, 955.6554565429688, 225.23355102539062, 549.074462890625, -859.4469604492188, 693.3245849609375, 707.4691162109375, 165.85923767089844, 760.791015625, 111.50636291503906, 839.85693359375, 809.763916015625, -180.12852478027344, 1123.8280029296875, -1397.0531005859375, 911.5091552734375, 858.1369018554688, -146.96751403808594, 367.919921875, 682.7645874023438, 490.6353454589844, 57.78086853027344, 450.6949462890625, 833.8763427734375, 708.9654541015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000465.npy"}
|
|
{"epoch": 0.6828193832599119, "step": 466, "batch_size": 64, "mean": 335.2580261230469, "std": 494.2394714355469, "min": -1126.9891357421875, "p10": -328.97020874023434, "median": 286.6752166748047, "p90": 962.3815185546875, "max": 1475.7724609375, "pos_frac": 0.78125, "sample": [596.3165893554688, -348.90447998046875, 200.33636474609375, -399.3563537597656, 825.074951171875, 346.69146728515625, 503.1434020996094, 186.1515350341797, 653.515625, 71.77079772949219, 128.35455322265625, 1113.5257568359375, -393.7833251953125, 861.0147705078125, -26.01504135131836, 152.73046875, 319.68096923828125, 12.643573760986328, 455.3266906738281, 159.4959716796875, 540.1888427734375, 184.76007080078125, 534.24267578125, 377.94903564453125, 849.5247192382812, 160.78717041015625, -4.713768005371094, 803.0225830078125, 279.63726806640625, 550.8772583007812, 374.12628173828125, 172.64535522460938, 845.3543090820312, 1475.7724609375, -1126.9891357421875, 967.0209350585938, 412.0393371582031, 719.1597900390625, 1232.6458740234375, 485.67999267578125, 949.1513061523438, 4.840557098388672, 216.33111572265625, 202.23117065429688, 538.5855712890625, 1240.0753173828125, -203.25033569335938, -435.7266540527344, -66.79349517822266, 90.36700439453125, 1146.24609375, 293.7131652832031, 818.0253295898438, -20.233314514160156, 970.1986083984375, 442.00103759765625, -574.5069580078125, 0.1990509033203125, 951.5562133789062, -282.4569091796875, 270.0356750488281, -132.06988525390625, -409.9389343261719, 196.48782348632812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000466.npy"}
|
|
{"epoch": 0.684287812041116, "step": 467, "batch_size": 64, "mean": 354.17047119140625, "std": 672.411865234375, "min": -1327.302001953125, "p10": -471.54986877441405, "median": 397.33689880371094, "p90": 1170.4131591796875, "max": 1939.212890625, "pos_frac": 0.71875, "sample": [854.9246215820312, 1588.4547119140625, 693.6704711914062, 1316.96435546875, -746.4620971679688, -1327.302001953125, 646.797607421875, 456.7754821777344, 499.2306213378906, 356.9459228515625, 320.3083190917969, 431.50244140625, 1767.41259765625, 158.03611755371094, 541.8643188476562, 451.25286865234375, 117.74329376220703, 1149.8155517578125, 1480.9881591796875, 492.58221435546875, 406.110107421875, -66.66822814941406, 71.31854248046875, 809.9696044921875, -354.41741943359375, -437.1165466308594, -692.532470703125, 554.6102294921875, 1172.218505859375, 416.24835205078125, 1061.412841796875, 170.56932067871094, 451.64398193359375, -539.4066772460938, 1043.7950439453125, 7.368865966796875, 1166.20068359375, 227.22991943359375, -74.4345703125, 867.6819458007812, 290.44677734375, -1176.4366455078125, 200.68157958984375, -274.67657470703125, 61.79344177246094, 449.76593017578125, -2.6053466796875, -348.9725341796875, -322.92864990234375, 761.8635864257812, 905.9116821289062, 821.7080078125, 1259.2362060546875, 388.5636901855469, 917.6121826171875, -19.34790802001953, 635.6116333007812, -29.368194580078125, -486.3070068359375, 1939.212890625, -986.1392211914062, -32.85995864868164, 42.88054656982422, 157.95654296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000467.npy"}
|
|
{"epoch": 0.6857562408223201, "step": 468, "batch_size": 64, "mean": 444.5817565917969, "std": 578.7382202148438, "min": -1028.61572265625, "p10": -119.88535919189451, "median": 370.33538818359375, "p90": 1413.2689819335942, "max": 2184.939208984375, "pos_frac": 0.875, "sample": [444.3622741699219, 38.51738739013672, 1477.0849609375, 567.6026611328125, 1489.4366455078125, 934.1784057617188, 551.8944702148438, 679.109619140625, 488.0071716308594, 584.6311645507812, 357.6864929199219, 782.7451782226562, 142.88833618164062, 454.8307189941406, 581.1719970703125, 106.5030517578125, 226.2911834716797, -185.27491760253906, 351.8741149902344, 239.09487915039062, 189.52838134765625, 13.862071990966797, 42.68578338623047, 382.9842834472656, 423.5818786621094, 1468.546142578125, 346.2132568359375, 1571.99169921875, 1043.5386962890625, 155.60665893554688, -400.1169128417969, 8.347244262695312, 567.9784545898438, 292.4871826171875, 406.1673278808594, 557.9375610351562, 116.91382598876953, 105.2335205078125, 579.6394653320312, -1028.61572265625, 1815.428466796875, 88.64530944824219, 444.05670166015625, 525.7970581054688, 417.6376953125, 165.73739624023438, -261.7005615234375, 309.5242004394531, 9.524642944335938, 1284.2889404296875, 200.62631225585938, 2087.018310546875, 276.3050231933594, 774.451171875, 3.3681106567382812, 462.4010314941406, 417.5222473144531, 2184.939208984375, 566.793212890625, -126.8304443359375, 320.41748046875, -417.64007568359375, -103.68016052246094, -148.54522705078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000468.npy"}
|
|
{"epoch": 0.6872246696035242, "step": 469, "batch_size": 64, "mean": 443.07672119140625, "std": 526.7197265625, "min": -741.7655639648438, "p10": -147.63352279663079, "median": 464.9999542236328, "p90": 1175.1594970703125, "max": 1907.375244140625, "pos_frac": 0.828125, "sample": [1066.3826904296875, 655.668212890625, 103.72628021240234, 548.2422485351562, 1907.375244140625, 557.4761962890625, 1240.700927734375, 45.98998260498047, -34.10508728027344, 156.7445068359375, 646.8450927734375, 129.8148651123047, -678.7996826171875, 33.452423095703125, 1178.2574462890625, -13.014350891113281, 488.9692077636719, 584.0140380859375, 462.0714111328125, 558.06884765625, 1152.6048583984375, 219.57351684570312, 119.06462097167969, 132.45970153808594, 1170.420654296875, 2.27313232421875, 841.7667236328125, 505.9339294433594, 1199.938232421875, -345.854736328125, 720.9924926757812, -179.9459991455078, 791.147705078125, 199.74891662597656, 704.3853149414062, 289.32818603515625, -59.728675842285156, -176.74276733398438, 10.90789794921875, -481.76531982421875, 467.9284973144531, 1005.7301635742188, 1215.1143798828125, 62.54475021362305, -364.16748046875, 197.99969482421875, 251.60330200195312, 1082.2672119140625, 383.71331787109375, 81.78748321533203, 1042.4407958984375, 945.3317260742188, 541.4976806640625, 1177.1904296875, 1258.308349609375, 773.4035034179688, 529.5413208007812, 346.332763671875, 89.89488220214844, -79.71195220947266, 184.34585571289062, 683.0897827148438, 768.09814453125, -741.7655639648438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000469.npy"}
|
|
{"epoch": 0.6886930983847284, "step": 470, "batch_size": 64, "mean": 386.737548828125, "std": 534.6903076171875, "min": -1197.7696533203125, "p10": -291.75819396972656, "median": 453.46177673339844, "p90": 1002.4114990234376, "max": 1697.283203125, "pos_frac": 0.75, "sample": [459.8858642578125, 504.85809326171875, -154.23193359375, 107.16153717041016, 198.63284301757812, -116.35909271240234, 327.715576171875, 111.4994125366211, -57.697174072265625, -673.9273071289062, 579.6582641601562, 736.5143432617188, 466.0272521972656, 447.0376892089844, 631.7828979492188, 1086.31787109375, -289.77374267578125, 718.4432373046875, -1197.7696533203125, 691.6676635742188, -193.79940795898438, 777.1790771484375, 188.34707641601562, -432.7843933105469, 254.2052001953125, 195.96377563476562, -292.6086730957031, 313.8109130859375, 485.14300537109375, 394.6091003417969, 690.693359375, -297.76019287109375, 1394.610107421875, 165.3403778076172, -26.57977867126465, 830.361572265625, 390.93951416015625, 626.5772094726562, 1697.283203125, 691.105712890625, 1266.7156982421875, 96.40890502929688, 797.2806396484375, 977.990966796875, -99.65724182128906, 1439.744384765625, 545.9013671875, 545.5595703125, 688.253662109375, 958.04443359375, 468.86773681640625, -295.5037841796875, -263.9732666015625, 263.8129577636719, 551.0673217773438, 788.9224853515625, -396.68133544921875, 1349.561279296875, 786.454345703125, -110.28889465332031, 1012.87744140625, 98.32809448242188, 328.0477600097656, 523.3875732421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000470.npy"}
|
|
{"epoch": 0.6901615271659325, "step": 471, "batch_size": 64, "mean": 321.73797607421875, "std": 542.1455078125, "min": -861.8800048828125, "p10": -278.3611755371094, "median": 204.81277465820312, "p90": 894.6962951660156, "max": 2061.860595703125, "pos_frac": 0.734375, "sample": [1235.3045654296875, -22.56372833251953, -66.4278564453125, 663.6156005859375, 363.836181640625, 727.0870361328125, 706.4476318359375, 2061.860595703125, 922.8099365234375, 264.23626708984375, 53.48484802246094, 325.9404296875, -170.2067413330078, 69.51065063476562, 385.9380187988281, -80.41839599609375, -250.94464111328125, -261.453125, 148.1458740234375, -75.23934936523438, 1611.93408203125, -376.56805419921875, 299.07684326171875, 378.9546203613281, 726.1666870117188, -113.40070343017578, -350.68084716796875, -64.86050415039062, -490.071533203125, 472.1000671386719, -253.99745178222656, 1075.1005859375, 760.0474243164062, 897.9450073242188, 496.3918762207031, 750.9248657226562, 860.4688720703125, 92.00575256347656, 147.9279022216797, 487.64190673828125, 192.71389770507812, 168.70217895507812, 95.63927459716797, 58.26740646362305, 841.5166015625, 1825.28857421875, 45.07745361328125, -861.8800048828125, 814.429443359375, 33.01304626464844, -354.6627197265625, 175.4317626953125, 70.67920684814453, 401.35418701171875, 74.39216613769531, 569.6373901367188, 21.08963966369629, -285.60748291015625, 441.7994384765625, 216.91165161132812, 683.1478271484375, 887.115966796875, -405.7317199707031, 474.83331298828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000471.npy"}
|
|
{"epoch": 0.6916299559471366, "step": 472, "batch_size": 64, "mean": 400.1733703613281, "std": 664.3963012695312, "min": -1389.5599365234375, "p10": -246.91765594482416, "median": 273.3364562988281, "p90": 1334.9525024414065, "max": 2335.417236328125, "pos_frac": 0.796875, "sample": [-1389.5599365234375, -456.90869140625, 861.1632080078125, -491.0302429199219, 198.2109832763672, 190.33242797851562, 625.2001342773438, -164.5762939453125, -53.22918701171875, -799.0219116210938, 315.5245056152344, 63.555240631103516, 142.1158905029297, 580.9274291992188, 58.965545654296875, 935.1412353515625, -39.67158508300781, 671.656494140625, 268.3338623046875, 18.14453125, 1253.24365234375, 46.96717834472656, 59.017333984375, 14.386215209960938, 177.7652587890625, 539.7618408203125, 306.9996032714844, -273.0387878417969, 306.7474365234375, 933.1177978515625, 35.228729248046875, 1027.5809326171875, 201.93246459960938, 981.6805419921875, 2125.079345703125, 1742.76171875, 187.4851531982422, 1468.478515625, 278.33905029296875, 545.8367919921875, 490.70831298828125, 1076.218994140625, 100.48713684082031, 304.149169921875, -204.2616729736328, 1369.9705810546875, 1628.479736328125, 106.01252746582031, 1545.5767822265625, 18.421035766601562, 2335.417236328125, -161.48838806152344, -265.19879150390625, -87.33850860595703, 1177.682861328125, 678.8079833984375, 714.4815063476562, 206.74435424804688, 395.4341125488281, -461.70135498046875, 307.1767883300781, 404.9815979003906, 285.6316223144531, 150.0569305419922], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000472.npy"}
|
|
{"epoch": 0.6930983847283406, "step": 473, "batch_size": 64, "mean": 182.16502380371094, "std": 836.7240600585938, "min": -2009.171630859375, "p10": -535.9990478515625, "median": 194.9854507446289, "p90": 991.7993408203125, "max": 3822.724609375, "pos_frac": 0.65625, "sample": [-245.94198608398438, 912.10546875, 242.26316833496094, -341.935546875, -544.1724243164062, 1356.776123046875, -125.14910888671875, 193.99838256835938, 213.82313537597656, 355.9470520019531, 1231.29541015625, 648.6763916015625, -77.00111389160156, 142.00189208984375, -590.7205810546875, -468.2296142578125, -2.619232177734375, -2.7709808349609375, 195.97251892089844, 1264.364013671875, -368.5178527832031, -129.93360900878906, 737.7296142578125, 325.47503662109375, 90.09075927734375, 94.60077667236328, 258.178955078125, 343.942138671875, 189.50433349609375, 85.68496704101562, -2009.171630859375, 49.517982482910156, -535.7789306640625, 237.01734924316406, -1766.3311767578125, -1946.9339599609375, 3822.724609375, 305.05328369140625, 439.62548828125, 1296.0654296875, 286.33282470703125, 279.60626220703125, 760.1732177734375, 815.1094970703125, 187.86448669433594, 640.0145263671875, -536.0933837890625, -334.4399108886719, 210.2746124267578, 981.736328125, 606.05029296875, 996.112060546875, -414.01617431640625, -218.9727783203125, 70.66848754882812, -105.91788482666016, 902.2459716796875, -1525.8443603515625, 503.7132263183594, 158.33078002929688, -374.95855712890625, 312.52789306640625, 219.09494018554688, 1361.72265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000473.npy"}
|
|
{"epoch": 0.6945668135095447, "step": 474, "batch_size": 64, "mean": 327.47601318359375, "std": 611.2088012695312, "min": -1557.2181396484375, "p10": -259.3656311035156, "median": 316.39991760253906, "p90": 1021.6516967773439, "max": 2072.72119140625, "pos_frac": 0.765625, "sample": [-1089.825927734375, 81.24324798583984, 240.8306884765625, 1034.5848388671875, 789.211669921875, -635.8684692382812, 991.474365234375, 123.43534088134766, 280.3124084472656, 541.7780151367188, 1161.04248046875, 2072.72119140625, 29.425085067749023, 241.91519165039062, 100.79109954833984, 865.113525390625, 264.8580627441406, -9.443962097167969, -430.65093994140625, 368.15234375, 37.36701965332031, 395.2774353027344, -195.30987548828125, 163.60494995117188, 380.57159423828125, 763.4398193359375, 812.86376953125, 174.99383544921875, 575.9154663085938, 353.97894287109375, 697.93896484375, -101.0177001953125, 13.796257019042969, 338.186279296875, 567.6626586914062, 288.789794921875, 655.296142578125, 114.1468505859375, -235.79283142089844, -267.8988037109375, 729.99072265625, -99.83587646484375, 596.4761962890625, 531.0382690429688, 176.3914794921875, 294.6135559082031, 1402.4859619140625, 1083.044921875, -106.37400817871094, -700.2863159179688, -239.45489501953125, 919.4673461914062, 423.8819274902344, 646.2998657226562, 569.6793212890625, 220.41639709472656, 383.34326171875, -856.8641967773438, 734.67431640625, -147.84732055664062, 364.9609375, 1300.825927734375, -1557.2181396484375, 1733.843505859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000474.npy"}
|
|
{"epoch": 0.6960352422907489, "step": 475, "batch_size": 64, "mean": 434.46240234375, "std": 461.8392639160156, "min": -388.8052062988281, "p10": -185.18487396240232, "median": 375.6757354736328, "p90": 1028.2608886718756, "max": 1516.798583984375, "pos_frac": 0.796875, "sample": [108.1932373046875, -70.06721496582031, 341.8962707519531, 178.19436645507812, -312.46405029296875, 641.931396484375, 1382.176025390625, 373.15771484375, 1201.184814453125, 1091.9312744140625, 301.8227844238281, 307.3518371582031, 417.922119140625, -1.274688720703125, 633.1920166015625, -388.8052062988281, 682.9686279296875, 58.51618957519531, 808.0448608398438, -193.88003540039062, 782.1061401367188, 872.4382934570312, 772.008544921875, 246.28887939453125, 51.83889389038086, 319.90771484375, 377.01690673828125, 406.32366943359375, 55.6474723815918, 513.855712890625, 280.43463134765625, 528.1634521484375, -96.55079650878906, 276.2572937011719, 818.0231323242188, 865.6014404296875, 451.3049621582031, 752.1555786132812, -206.23878479003906, 1349.6048583984375, 526.1148071289062, -19.55207061767578, 284.7730407714844, -252.8257293701172, 855.6729736328125, 86.42308044433594, 550.6158447265625, -164.8961639404297, 337.88031005859375, 849.131591796875, -148.92465209960938, 847.261474609375, 526.2855224609375, 100.20480346679688, 1495.640625, -219.7935333251953, 320.14849853515625, 1516.798583984375, 708.08740234375, 1190.290771484375, 374.3345642089844, 879.6966552734375, -226.53335571289062, 410.5787353515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000475.npy"}
|
|
{"epoch": 0.697503671071953, "step": 476, "batch_size": 64, "mean": 336.776611328125, "std": 480.3819580078125, "min": -563.6574096679688, "p10": -178.10211944580078, "median": 261.4314270019531, "p90": 908.9715576171877, "max": 2213.591064453125, "pos_frac": 0.796875, "sample": [838.9903564453125, 199.1207733154297, -128.90740966796875, 726.640869140625, 261.89239501953125, 509.6947937011719, 1425.2269287109375, 190.70944213867188, 145.28372192382812, -130.98358154296875, 1024.964111328125, 43.88593292236328, 124.62760925292969, 407.2076416015625, 767.36572265625, 651.994384765625, 705.02587890625, 152.79135131835938, 255.38137817382812, 139.37033081054688, 551.1633911132812, 243.69677734375, -36.93733215332031, 1248.319580078125, 550.974853515625, 576.00927734375, -513.9200439453125, -330.15301513671875, 636.9468994140625, 83.2458724975586, 9.05645751953125, 275.2379455566406, -322.3226013183594, -258.8496398925781, 520.1727294921875, -175.06370544433594, -179.404296875, -528.8225708007812, 708.656494140625, 203.45513916015625, 132.22955322265625, 604.9512939453125, 287.8789978027344, -126.56221008300781, 124.83934783935547, 260.970458984375, 693.8964233398438, 1040.32421875, 2213.591064453125, 938.9635009765625, 348.73187255859375, -59.09593200683594, 353.7924499511719, 250.349609375, 107.57017517089844, 106.71975708007812, 501.3670349121094, 471.962890625, 101.78529357910156, 366.6363525390625, 325.739990234375, 1131.4024658203125, 367.5712890625, -563.6574096679688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000476.npy"}
|
|
{"epoch": 0.6989720998531571, "step": 477, "batch_size": 64, "mean": 349.0743408203125, "std": 614.7630615234375, "min": -1731.089599609375, "p10": -272.4799835205078, "median": 272.94847106933594, "p90": 996.4054809570313, "max": 2323.5859375, "pos_frac": 0.734375, "sample": [-562.0038452148438, 474.8106689453125, 957.8908081054688, -670.5684204101562, 1595.2630615234375, 249.27415466308594, 248.48641967773438, 939.0074462890625, 841.6089477539062, 1003.68798828125, 70.5741958618164, 7.469457626342773, 256.81982421875, 935.279296875, 1006.4005126953125, -135.24354553222656, 678.4672241210938, 523.2718505859375, 1229.2645263671875, -280.43988037109375, 594.7213745117188, 170.34490966796875, -9.924118041992188, 173.14495849609375, -253.90689086914062, 880.169189453125, 359.4475402832031, 145.56527709960938, -61.139312744140625, -67.80474853515625, -654.4085083007812, 564.1602783203125, -1731.089599609375, -694.73486328125, 1419.08642578125, 135.63961791992188, 275.5606384277344, 216.66162109375, 541.730712890625, 104.02658081054688, 565.05810546875, -2.0883750915527344, 621.108642578125, 979.4129638671875, 383.1446228027344, 412.38507080078125, 281.3359375, 869.0591430664062, 425.4598083496094, 240.63897705078125, 1194.334228515625, -251.53573608398438, -63.246055603027344, 270.3363037109375, -4.8077545166015625, 880.8868408203125, 676.16796875, -143.66818237304688, 35.931488037109375, -386.72088623046875, 171.33248901367188, 695.0338745117188, 2323.5859375, 691.041259765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000477.npy"}
|
|
{"epoch": 0.7004405286343612, "step": 478, "batch_size": 64, "mean": 402.06536865234375, "std": 531.4916381835938, "min": -909.4766845703125, "p10": -237.871109008789, "median": 391.9664001464844, "p90": 1230.1108886718753, "max": 1822.8262939453125, "pos_frac": 0.8125, "sample": [-267.90960693359375, 129.3504180908203, 823.2716064453125, 221.9998779296875, 44.917991638183594, 473.133544921875, 309.3858947753906, -418.15301513671875, 543.6575317382812, 7.9093017578125, 393.804443359375, 549.8588256835938, -527.5925903320312, -494.5533447265625, 309.0885314941406, 838.0570678710938, 847.52490234375, 1822.8262939453125, 547.571533203125, 131.93099975585938, -373.799072265625, -167.78128051757812, -422.07135009765625, 159.82943725585938, 488.33367919921875, 1022.4261474609375, 465.218994140625, 547.9259033203125, -56.251251220703125, 184.20106506347656, 1472.2869873046875, 1137.5367431640625, 390.12835693359375, 262.3650207519531, 407.0419921875, 170.18905639648438, 1336.3121337890625, 1408.2191162109375, 694.006591796875, -160.23179626464844, 627.5472412109375, -67.04534149169922, 121.36137390136719, -78.22270965576172, 742.8739624023438, 281.8611145019531, 168.17259216308594, 1269.7855224609375, 207.85752868652344, 423.053466796875, 567.9523315429688, 79.67842864990234, 362.521484375, 857.635009765625, 470.007080078125, 291.856689453125, 1384.6151123046875, 656.62451171875, 1324.5460205078125, -909.4766845703125, 629.5707397460938, 443.8079833984375, 616.2488403320312, 7.3830718994140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000478.npy"}
|
|
{"epoch": 0.7019089574155654, "step": 479, "batch_size": 64, "mean": 385.1879577636719, "std": 683.02978515625, "min": -1585.702880859375, "p10": -272.3506195068359, "median": 302.2447204589844, "p90": 1179.8433837890625, "max": 3148.469970703125, "pos_frac": 0.71875, "sample": [41.898536682128906, 60.735252380371094, 1181.1251220703125, 640.8128662109375, 1240.0050048828125, 1215.873291015625, -501.89361572265625, -252.67160034179688, 294.13543701171875, 35.3193359375, 847.7824096679688, -213.31143188476562, -201.2686767578125, 311.73095703125, 851.4384765625, 240.64581298828125, 201.3901824951172, -104.31378173828125, 1114.6434326171875, 454.8898620605469, 252.56265258789062, 482.007080078125, -112.60231018066406, 535.8450317382812, 3148.469970703125, 1292.6087646484375, 686.0008544921875, 737.5625, 718.0064697265625, 553.2302856445312, 1176.8526611328125, 253.53952026367188, 176.12664794921875, 830.7113037109375, -701.536865234375, 1902.22802734375, -25.042816162109375, 706.6262817382812, 454.3531188964844, 138.49497985839844, 421.4971008300781, 1139.5123291015625, 772.0289306640625, 462.07098388671875, -157.00120544433594, -1585.702880859375, 459.56463623046875, -396.5084228515625, -280.78448486328125, -111.49530029296875, 138.44468688964844, -141.17105102539062, -505.78375244140625, -568.6898193359375, 198.62794494628906, -81.7075424194336, 544.0030517578125, -37.859375, 53.0560302734375, 1304.0198974609375, 310.35400390625, 1081.08642578125, 145.29429626464844, 824.161865234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000479.npy"}
|
|
{"epoch": 0.7033773861967695, "step": 480, "batch_size": 64, "mean": 302.38421630859375, "std": 581.4031372070312, "min": -854.6018676757812, "p10": -443.15312194824213, "median": 337.36314392089844, "p90": 982.6928649902344, "max": 2243.948974609375, "pos_frac": 0.6875, "sample": [-468.7287902832031, 402.8211669921875, -155.79354858398438, 128.32733154296875, 570.7271728515625, 26.996604919433594, 261.8414611816406, 982.07177734375, 634.1441650390625, 862.9854125976562, 612.7802734375, 1305.000732421875, 555.8751220703125, -371.07757568359375, -601.9192504882812, -530.3399658203125, 802.6204223632812, 822.1863403320312, -148.73764038085938, 367.995849609375, 30.267967224121094, -203.25950622558594, 22.725204467773438, 456.2933349609375, 358.8371276855469, 190.84642028808594, 2243.948974609375, 332.8582763671875, 589.67724609375, -253.05433654785156, 813.447509765625, 982.9590454101562, 949.4605712890625, -240.38414001464844, -528.7647705078125, 341.8680114746094, 449.0115966796875, -49.42308044433594, 508.66632080078125, -854.6018676757812, -836.8958740234375, -644.7738037109375, 616.1947631835938, 1124.9776611328125, 1440.940673828125, -143.19973754882812, 364.1752014160156, 433.15692138671875, -47.18081283569336, 469.5676574707031, -8.982406616210938, -383.4765625, 167.242431640625, 216.00820922851562, 122.6883544921875, 7.949268341064453, 155.5164794921875, -16.57046890258789, 586.8745727539062, 1086.596923828125, 528.148193359375, -194.5505828857422, 1186.25244140625, 920.7728881835938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000480.npy"}
|
|
{"epoch": 0.7048458149779736, "step": 481, "batch_size": 64, "mean": 464.44964599609375, "std": 578.2244873046875, "min": -933.6080322265625, "p10": -184.3487594604492, "median": 399.38323974609375, "p90": 1047.98486328125, "max": 2273.047607421875, "pos_frac": 0.765625, "sample": [2273.047607421875, -933.6080322265625, 585.9940795898438, 397.01593017578125, -226.4154815673828, -102.10087585449219, -106.33584594726562, 357.1153869628906, 485.59405517578125, 1587.35693359375, 91.31743621826172, -224.47476196289062, 195.1874542236328, -238.1116943359375, 522.7977905273438, 868.231201171875, -180.61624145507812, 697.80908203125, 1437.0994873046875, -2.9187164306640625, 37.32598876953125, 810.5626831054688, 331.7951965332031, 795.5780639648438, 460.4345703125, 199.35430908203125, 213.66854858398438, 1027.6376953125, 869.12060546875, -402.5368957519531, 111.111328125, 787.1478881835938, 401.75054931640625, -259.750244140625, 793.9627685546875, 892.0930786132812, -150.89215087890625, -44.73537063598633, 1056.705078125, 504.75579833984375, -148.24151611328125, 146.76927185058594, 953.355224609375, 895.9677124023438, 85.8248062133789, 131.6470947265625, 1790.5257568359375, 339.2485046386719, 1605.570068359375, 1024.752197265625, 236.13922119140625, -7.9669342041015625, 406.0653991699219, 460.4981384277344, 572.8683471679688, 270.815673828125, 218.51992797851562, 863.6683349609375, 808.5281372070312, 738.086181640625, 1546.6522216796875, -185.9484100341797, 365.75213623046875, 686.6028442382812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000481.npy"}
|
|
{"epoch": 0.7063142437591777, "step": 482, "batch_size": 64, "mean": 205.19125366210938, "std": 626.0250854492188, "min": -2106.2060546875, "p10": -507.3819427490234, "median": 249.48712158203125, "p90": 869.1270568847658, "max": 1915.220947265625, "pos_frac": 0.703125, "sample": [161.83877563476562, -55.86778259277344, 333.6095275878906, -815.3740234375, 445.38763427734375, 518.8282470703125, -1060.6470947265625, 589.6497192382812, 593.2986450195312, 428.3805847167969, 394.3355712890625, 680.6575317382812, -10.392318725585938, 353.9107666015625, 679.5034790039062, 1179.057373046875, 234.8714599609375, -369.63531494140625, 379.12139892578125, 372.72589111328125, 10.349498748779297, 347.2068176269531, 386.6942138671875, 898.0764770507812, -1050.2305908203125, 355.3441162109375, 81.00237274169922, 891.9121704101562, 264.102783203125, 513.7989501953125, 661.8587036132812, 654.569580078125, 760.0235595703125, 234.74520874023438, -316.5642395019531, 178.04257202148438, 1098.681396484375, -466.5219421386719, 1299.004150390625, -524.8933715820312, 619.2120361328125, -50.82946014404297, -213.73184204101562, 79.4889907836914, 200.4663848876953, 79.52835083007812, -2106.2060546875, 335.50421142578125, 1915.220947265625, -4.503822326660156, 22.00079345703125, -104.72279357910156, 4.1376953125, 465.8355712890625, -1029.900634765625, 320.75970458984375, 142.18853759765625, 1325.2000732421875, -162.9581298828125, 87.76853942871094, -214.8148193359375, -127.42398834228516, 815.9617919921875, -576.4041137695312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000482.npy"}
|
|
{"epoch": 0.7077826725403817, "step": 483, "batch_size": 64, "mean": 475.9374084472656, "std": 584.6889038085938, "min": -499.2483215332031, "p10": -233.14519500732422, "median": 391.9900207519531, "p90": 1176.90869140625, "max": 3026.2529296875, "pos_frac": 0.8125, "sample": [-149.2707061767578, 119.63076782226562, 1317.5067138671875, 786.5553588867188, -306.72479248046875, 767.2975463867188, 413.3074951171875, 312.2796325683594, 753.909912109375, 630.6764526367188, 389.35540771484375, 796.4913940429688, 502.517333984375, 248.38038635253906, 275.1817932128906, 335.6748962402344, 983.5366821289062, 957.2939453125, -408.0692138671875, 538.6163330078125, 323.70904541015625, -62.83934783935547, 763.5557861328125, 822.921875, 50.76530456542969, -234.75430297851562, 1327.9736328125, 729.5625, 25.62310791015625, 1024.3876953125, -350.7261657714844, 1351.023681640625, 67.2546615600586, 924.5882568359375, 45.54039001464844, 1185.0513916015625, 272.7676696777344, 855.3275756835938, 195.76937866210938, 1099.052978515625, 782.4778442382812, 685.8578491210938, 478.7056579589844, -355.8399658203125, 112.95587921142578, 394.6246337890625, 469.3875427246094, 284.1922607421875, 1157.9090576171875, -20.004898071289062, 156.2374267578125, -499.2483215332031, 170.60891723632812, 3026.2529296875, -229.39060974121094, 184.85240173339844, 1340.4815673828125, 287.32073974609375, 484.2313232421875, -283.63885498046875, 762.20458984375, 1308.6988525390625, 276.65728759765625, -196.24224853515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000483.npy"}
|
|
{"epoch": 0.7092511013215859, "step": 484, "batch_size": 64, "mean": 421.85662841796875, "std": 512.5430297851562, "min": -693.7930908203125, "p10": -117.09284744262695, "median": 366.9254608154297, "p90": 1092.231286621094, "max": 1766.6778564453125, "pos_frac": 0.78125, "sample": [423.31024169921875, -131.73765563964844, 1732.32080078125, 140.1591339111328, -111.96241760253906, -265.882568359375, 599.2843627929688, 122.23021697998047, 1766.6778564453125, 194.66192626953125, 94.77285766601562, 489.0340270996094, 544.5758666992188, 557.8406982421875, 985.3890380859375, 561.9710083007812, 354.2278137207031, 64.62152099609375, -199.62777709960938, -693.7930908203125, 398.2295837402344, -42.1187858581543, 992.5439453125, 965.7744140625, 1593.927734375, 41.6427001953125, 389.15057373046875, 752.6026611328125, 11.119075775146484, -74.62521362304688, -31.93465805053711, -76.78314208984375, 831.7276611328125, 1189.0462646484375, 37.092262268066406, 220.43777465820312, 229.3297119140625, 106.54073333740234, 186.38548278808594, -149.12217712402344, -20.902305603027344, 314.2519836425781, 752.621337890625, 379.62310791015625, 1254.97802734375, 23.222900390625, 696.9157104492188, 1038.2059326171875, 422.1078796386719, -210.62332153320312, -50.46717834472656, 746.9757080078125, 1115.385009765625, 830.3090209960938, 61.92808532714844, 1150.0966796875, 926.973876953125, 432.88531494140625, 442.92266845703125, 127.9742202758789, 839.5999755859375, 56.99281311035156, -119.2916030883789, 987.0978393554688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000484.npy"}
|
|
{"epoch": 0.71071953010279, "step": 485, "batch_size": 64, "mean": 398.8216247558594, "std": 565.0696411132812, "min": -932.9967651367188, "p10": -112.24151153564449, "median": 278.9529266357422, "p90": 1065.2291503906251, "max": 2133.734375, "pos_frac": 0.78125, "sample": [667.080078125, 697.73779296875, 10.739456176757812, 260.9463806152344, 398.5987854003906, 113.09574127197266, 1157.6044921875, 176.11386108398438, 434.18487548828125, 463.4560241699219, 186.89398193359375, -30.60779571533203, 1404.8846435546875, 1081.5909423828125, 188.20074462890625, 978.3767700195312, -631.1881713867188, -40.56043243408203, 994.8173828125, 965.60205078125, 961.4091796875, 1199.8822021484375, 81.66204833984375, 471.9967041015625, 2133.734375, 1094.9969482421875, -36.95642852783203, 33.79814910888672, 770.8831176757812, -77.2104721069336, -54.603736877441406, -48.481170654296875, -521.305419921875, 160.3665771484375, 264.9581298828125, 1019.0418090820312, 525.42919921875, 120.64850616455078, 141.09124755859375, 707.5782470703125, -53.12579345703125, 139.44900512695312, 1921.718017578125, 467.4591064453125, -127.25481414794922, 349.1877746582031, 391.4768371582031, 632.807861328125, 292.9477233886719, -594.65185546875, 1027.0516357421875, 1014.474853515625, 57.104068756103516, -268.5751037597656, 50.10676574707031, -186.57777404785156, 856.6331787109375, 226.2178192138672, 352.4122619628906, 198.54568481445312, 20.68738555908203, -932.9967651367188, 881.9030151367188, 381.0964050292969], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000485.npy"}
|
|
{"epoch": 0.7121879588839941, "step": 486, "batch_size": 64, "mean": 457.0060119628906, "std": 689.4300537109375, "min": -1209.8760986328125, "p10": -435.8781555175781, "median": 384.7865753173828, "p90": 1305.7870239257813, "max": 3059.820068359375, "pos_frac": 0.796875, "sample": [904.0462646484375, 552.7471313476562, 107.09950256347656, 532.3720092773438, 325.58697509765625, -754.0299072265625, 178.45809936523438, 59.048133850097656, 919.4938354492188, -153.77255249023438, 462.6169128417969, 1432.57275390625, 637.5949096679688, -784.3512573242188, -452.6630859375, 1530.6358642578125, 859.9552001953125, 906.5379638671875, -238.6913299560547, 983.166748046875, 1319.6158447265625, -396.71331787109375, 1640.393310546875, 1273.519775390625, 267.6634521484375, -111.15596771240234, 556.6444702148438, 327.4966125488281, 85.60177612304688, 1484.6334228515625, 1223.736083984375, 396.066650390625, -514.1049194335938, -757.7700805664062, 344.105224609375, 339.6468505859375, 135.52496337890625, 1319.9884033203125, 842.6619262695312, 813.6002197265625, 358.4942626953125, 669.238525390625, 498.62725830078125, -62.1673583984375, 563.5013427734375, 903.83349609375, 870.4549560546875, 596.4776000976562, 888.4850463867188, 1225.986083984375, 113.7955551147461, 347.9296875, 101.99124145507812, 373.5065002441406, 600.0582885742188, 140.7569122314453, 28.64533042907715, 584.4877319335938, -1209.8760986328125, 145.63014221191406, 369.3855895996094, -46.78948974609375, -473.465576171875, 3059.820068359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000486.npy"}
|
|
{"epoch": 0.7136563876651982, "step": 487, "batch_size": 64, "mean": 339.7569580078125, "std": 534.8098754882812, "min": -1681.393310546875, "p10": -236.00971679687493, "median": 272.87123107910156, "p90": 1042.0217407226564, "max": 1723.0501708984375, "pos_frac": 0.78125, "sample": [251.89833068847656, 1158.4779052734375, 202.86228942871094, -263.1175842285156, 1016.37646484375, 1113.1488037109375, 694.7139892578125, 459.2277526855469, 1632.57421875, -12.983757019042969, 237.64495849609375, -500.5565185546875, 1096.6968994140625, -172.75802612304688, 163.3328857421875, 1723.0501708984375, 276.88067626953125, -320.784423828125, 761.1009521484375, 773.9622192382812, 457.72088623046875, 603.8684692382812, -1681.393310546875, 166.44036865234375, -48.517112731933594, 652.1586303710938, 45.318626403808594, 904.7689208984375, 1264.5419921875, 21.89589500427246, 179.9427947998047, 300.5406494140625, 339.1565246582031, 556.9960327148438, 196.51513671875, -563.243896484375, 232.69552612304688, -507.8086853027344, 549.5036010742188, 268.8617858886719, 396.062255859375, 36.89862060546875, 602.5169677734375, -158.8836212158203, 561.3717651367188, 190.51641845703125, 126.24919128417969, 292.0010681152344, 424.296142578125, 245.664306640625, 100.65470886230469, 531.1882934570312, 260.9610595703125, 457.53631591796875, -24.014389038085938, -270.105224609375, 193.36691284179688, 1053.0125732421875, 710.9877319335938, 699.56103515625, -2.837066650390625, 578.828369140625, -133.07217407226562, 639.9716796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000487.npy"}
|
|
{"epoch": 0.7151248164464024, "step": 488, "batch_size": 64, "mean": 356.37518310546875, "std": 449.441650390625, "min": -518.4766845703125, "p10": -110.15949325561522, "median": 357.1648406982422, "p90": 1019.1304382324221, "max": 1701.868408203125, "pos_frac": 0.75, "sample": [-3.9266128540039062, 16.600162506103516, 1431.676025390625, 298.676025390625, 959.7473754882812, 249.32408142089844, 667.123291015625, -100.62916564941406, -130.62820434570312, 311.3323669433594, 418.75628662109375, 177.4365692138672, 594.152099609375, 1166.42919921875, 358.31121826171875, 819.2947387695312, -85.89620971679688, 69.73529815673828, -518.4766845703125, -55.4921875, 556.7947998046875, -114.2439193725586, 522.4039306640625, 367.0755310058594, 66.19847106933594, -268.560546875, 60.54592514038086, 657.5769653320312, 1701.868408203125, 234.20909118652344, 457.03277587890625, 616.5911254882812, -374.5455627441406, 1044.580322265625, 702.767578125, 12.511089324951172, 1168.6912841796875, 456.8663024902344, 356.0184631347656, 87.4302749633789, 583.6784057617188, 820.9638671875, 217.76214599609375, 414.69024658203125, 204.75331115722656, 410.8927307128906, 573.2548217773438, 1088.165283203125, -129.20175170898438, -64.1867446899414, -22.057912826538086, 446.6341552734375, 484.5736389160156, 23.248205184936523, 413.0977783203125, 593.0055541992188, -57.26325988769531, 449.4270324707031, -9.73746109008789, -74.55631256103516, -494.66973876953125, 541.998291015625, 259.3609619140625, 1178.8206787109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000488.npy"}
|
|
{"epoch": 0.7165932452276065, "step": 489, "batch_size": 64, "mean": 342.5459289550781, "std": 502.50189208984375, "min": -482.5206298828125, "p10": -155.48104171752925, "median": 190.9325714111328, "p90": 1092.5627075195314, "max": 1906.3333740234375, "pos_frac": 0.71875, "sample": [756.695068359375, 2.9370269775390625, 221.19786071777344, 82.76365661621094, 54.482261657714844, 147.59652709960938, 179.7167205810547, -169.05099487304688, 101.79139709472656, 126.165771484375, -235.67832946777344, 247.57150268554688, 564.756591796875, 826.1371459960938, 406.5967712402344, 124.49491882324219, -172.8668975830078, 207.875732421875, -123.81781768798828, 830.301513671875, -37.14433288574219, -112.37655639648438, 202.14842224121094, 249.95281982421875, -39.75082015991211, 149.03721618652344, 119.18498229980469, -41.759029388427734, 179.11610412597656, 538.209228515625, -92.3350830078125, 49.60432434082031, -10.160097122192383, -205.05157470703125, 1096.337646484375, 683.8850708007812, 57.9334831237793, 105.52006530761719, 333.1805419921875, -482.5206298828125, 652.8222045898438, -355.8901672363281, 1275.4697265625, -433.3541259765625, 1200.5908203125, 1083.7545166015625, 648.854248046875, 1906.3333740234375, 317.042724609375, 617.8529052734375, 1666.7764892578125, 622.8896484375, 537.19580078125, 1374.265869140625, -45.439292907714844, -78.54642486572266, 641.9573974609375, 506.6316223144531, 540.5135498046875, 394.8804931640625, -1.318817138671875, 1400.8240966796875, 581.6915283203125, -55.53619384765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000489.npy"}
|
|
{"epoch": 0.7180616740088106, "step": 490, "batch_size": 64, "mean": 373.4256896972656, "std": 644.287109375, "min": -1180.908935546875, "p10": -434.34369812011715, "median": 204.953369140625, "p90": 1231.4268310546877, "max": 1994.563720703125, "pos_frac": 0.796875, "sample": [74.37460327148438, -1057.9771728515625, 1182.133544921875, -175.14207458496094, 148.89666748046875, -24.691856384277344, 148.1727294921875, 899.7031860351562, 732.1903686523438, -571.6705932617188, 120.12786865234375, -60.263153076171875, 1246.521240234375, 95.79855346679688, 582.9515380859375, -458.4750671386719, -210.29714965820312, 1196.20654296875, 678.4187622070312, -643.7344970703125, 39.50016403198242, 143.21505737304688, -775.7786865234375, 102.49305725097656, -1180.908935546875, 1281.4124755859375, 138.65179443359375, 548.5217895507812, 482.4374084472656, 803.0699462890625, 337.39483642578125, 1994.563720703125, 158.42816162109375, 642.4176025390625, 86.87657928466797, -122.93897247314453, 1699.8477783203125, 792.5833129882812, 1585.00048828125, 86.93241882324219, 127.46600341796875, 226.42324829101562, 1269.4368896484375, 635.9185791015625, 141.66783142089844, 752.7505493164062, -378.03717041015625, 838.95458984375, 44.635009765625, 489.90411376953125, 377.964599609375, -516.003662109375, 1726.3824462890625, 21.049072265625, 1104.059814453125, 918.361572265625, 186.28314208984375, 150.07489013671875, 607.841796875, 862.07177734375, 58.31843948364258, 751.1953125, 531.9378051757812, 223.62359619140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000490.npy"}
|
|
{"epoch": 0.7195301027900147, "step": 491, "batch_size": 64, "mean": 253.20948791503906, "std": 481.7109375, "min": -792.7244873046875, "p10": -283.0273498535156, "median": 180.76647186279297, "p90": 864.332397460938, "max": 1569.5234375, "pos_frac": 0.71875, "sample": [314.95672607421875, -286.11639404296875, 744.0316772460938, 524.4696044921875, 1451.6749267578125, 186.7823028564453, 286.1106872558594, -114.1260986328125, 458.5574645996094, 83.50128173828125, 288.8348693847656, 91.15071105957031, 147.78903198242188, -492.2171630859375, 568.1517333984375, 113.38606262207031, 561.6845703125, 533.7354125976562, -83.20591735839844, 495.2585754394531, 174.75064086914062, -18.11989402770996, 921.956298828125, -495.2614440917969, 234.13807678222656, 240.3601531982422, 713.9410400390625, -274.5621337890625, 912.281494140625, 13.263116836547852, 1258.1839599609375, 362.5412292480469, -792.7244873046875, 146.502197265625, -210.57801818847656, 276.1216735839844, -10.229377746582031, 1090.4752197265625, 244.35145568847656, 390.36676025390625, 667.81689453125, -582.3617553710938, 1569.5234375, 42.036529541015625, -193.1624298095703, 174.42568969726562, -275.819580078125, 223.1887664794922, -55.48180389404297, 570.7061157226562, 78.91300964355469, -371.55084228515625, 666.0263061523438, -82.34327697753906, 24.0969181060791, 82.95262145996094, 349.4761962890625, 752.451171875, 117.69651794433594, -48.038482666015625, 1354.9771728515625, 146.41180419921875, 452.91162109375, -511.6139221191406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000491.npy"}
|
|
{"epoch": 0.7209985315712188, "step": 492, "batch_size": 64, "mean": 255.55410766601562, "std": 566.0716552734375, "min": -943.8262329101562, "p10": -416.1080047607422, "median": 159.55291748046875, "p90": 1094.114978027344, "max": 1394.915283203125, "pos_frac": 0.734375, "sample": [148.875732421875, 313.35052490234375, 554.08251953125, 26.1079044342041, 241.6373291015625, 59.38871765136719, 15.972991943359375, 736.5542602539062, -549.3599853515625, -152.2644500732422, 156.35403442382812, 706.806884765625, 607.1490478515625, -229.0855712890625, -943.8262329101562, 1002.3298950195312, -81.3078384399414, 1238.2706298828125, -199.10617065429688, 1325.4482421875, -428.25689697265625, 1394.915283203125, 349.9432678222656, -340.4170227050781, 353.953857421875, 369.3818359375, 240.76348876953125, -546.3471069335938, 78.58781433105469, 325.3234558105469, 10.25469970703125, 32.56855010986328, 162.75180053710938, -934.52490234375, -130.55709838867188, 1236.1622314453125, 869.9781494140625, 282.1322021484375, -721.1201171875, 148.7752685546875, 1012.353515625, -721.1322021484375, 1216.7330322265625, 22.61626434326172, -148.67703247070312, 798.5437622070312, 214.90284729003906, 82.75534057617188, 1054.741943359375, 51.427669525146484, 496.5521545410156, -161.09605407714844, 241.40203857421875, -387.7605895996094, 1110.9891357421875, 903.9822998046875, 625.4983520507812, 16.413619995117188, 75.32908630371094, 583.128662109375, -268.5258483886719, 1372.6890869140625, 45.72038269042969, 385.2271728515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000492.npy"}
|
|
{"epoch": 0.7224669603524229, "step": 493, "batch_size": 64, "mean": 275.22186279296875, "std": 521.0993041992188, "min": -1843.9498291015625, "p10": -209.9158493041992, "median": 189.7177734375, "p90": 1068.7117919921875, "max": 1294.2686767578125, "pos_frac": 0.734375, "sample": [1072.45263671875, 107.20235443115234, 529.5760498046875, 226.32049560546875, -93.43276977539062, 1287.315185546875, 885.6181640625, -18.530426025390625, 125.51619720458984, -394.30108642578125, 441.96075439453125, 99.23030090332031, 1026.65625, 505.70501708984375, 383.85247802734375, 146.23959350585938, -267.7734069824219, 360.8025207519531, 146.54867553710938, 42.4156494140625, 157.51036071777344, 233.66732788085938, 486.77679443359375, 386.70794677734375, -154.61370849609375, 421.51824951171875, 647.8721923828125, -236.91094970703125, -186.93780517578125, -107.4268569946289, 60.57623291015625, -1843.9498291015625, 320.499755859375, 26.292007446289062, 710.32470703125, 27.759302139282227, 1284.2060546875, 1294.2686767578125, 122.12938690185547, -214.8412628173828, -53.796417236328125, 337.97100830078125, 1176.387451171875, 815.33984375, 325.5148620605469, 1059.983154296875, -461.9809875488281, 334.7149353027344, 1256.696533203125, 1096.109375, 95.42903900146484, 607.6162109375, -243.8456268310547, 206.39926147460938, 217.5909423828125, -198.4232177734375, -103.75171661376953, 223.8532257080078, 651.865478515625, 173.03628540039062, 145.75933837890625, 27.3712158203125, -82.82931518554688, -41.61454772949219], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000493.npy"}
|
|
{"epoch": 0.723935389133627, "step": 494, "batch_size": 64, "mean": 435.43121337890625, "std": 482.89764404296875, "min": -612.517578125, "p10": -109.63961181640619, "median": 408.5062713623047, "p90": 1044.1489074707033, "max": 1791.2457275390625, "pos_frac": 0.828125, "sample": [-2.420145034790039, 344.6048583984375, 597.0040893554688, 395.25494384765625, 1791.2457275390625, 1302.641357421875, -612.517578125, 726.3850708007812, 682.5365600585938, 64.02740478515625, 471.650390625, -28.185993194580078, 825.0672607421875, 319.66033935546875, -291.3826904296875, 1134.071044921875, 709.7108154296875, -220.44830322265625, 457.7825622558594, 818.38623046875, 125.33466339111328, -414.0092468261719, 269.6446838378906, 962.4345092773438, 851.927490234375, 63.531837463378906, 784.3275146484375, 1105.5712890625, 234.27687072753906, 358.2021179199219, -493.7176513671875, 67.45957946777344, 310.6734313964844, -253.67596435546875, 1704.3533935546875, 411.5523986816406, 441.0997314453125, 43.084007263183594, 431.69427490234375, 1200.69189453125, 1001.5917358398438, 492.245849609375, -49.37593078613281, 14.824516296386719, 494.28680419921875, 640.3612060546875, 727.7086181640625, 902.0469970703125, 359.28564453125, 683.2594604492188, -135.46690368652344, 122.87037658691406, 532.03857421875, 155.58847045898438, 138.68930053710938, 496.3116455078125, 848.8765258789062, 677.9755249023438, 226.21197509765625, 405.46014404296875, 1062.3876953125, -17.944416046142578, 346.8535461425781, 51.97998046875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000494.npy"}
|
|
{"epoch": 0.7254038179148311, "step": 495, "batch_size": 64, "mean": 386.12274169921875, "std": 575.3947143554688, "min": -989.9065551757812, "p10": -207.7904342651367, "median": 307.53712463378906, "p90": 1076.8425781250003, "max": 2372.215087890625, "pos_frac": 0.78125, "sample": [359.6099853515625, 59.73030090332031, -76.29901123046875, -83.266357421875, 164.5794677734375, 418.662109375, -526.5121459960938, 216.568603515625, 268.3909912109375, 30.97674560546875, 162.9198455810547, 718.4224853515625, 906.3782348632812, -550.9490966796875, 380.69500732421875, 427.3235778808594, -278.0968933105469, 824.0001831054688, 80.60101318359375, 682.118896484375, 158.5061492919922, -31.368560791015625, 268.6762390136719, 82.410400390625, 292.5986022949219, 986.529052734375, 945.802490234375, 706.6759643554688, 266.5223693847656, 570.9691162109375, 270.19525146484375, 328.52337646484375, 145.89596557617188, 448.36602783203125, 1174.548095703125, 1101.02978515625, 776.2803955078125, -214.81617736816406, 2372.215087890625, 1353.271484375, 399.11590576171875, -40.85844421386719, 135.67758178710938, 651.9705200195312, 367.18365478515625, -607.8460693359375, 18.459457397460938, 408.8873291015625, 56.98387908935547, -324.6047058105469, -191.39703369140625, -8.481292724609375, 372.72625732421875, 614.4326171875, 1396.203857421875, 647.4580078125, 290.3164978027344, 844.3603515625, -989.9065551757812, 1235.2698974609375, 1020.40576171875, 322.47564697265625, 1985.79248046875, -81.45406341552734], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000495.npy"}
|
|
{"epoch": 0.7268722466960352, "step": 496, "batch_size": 64, "mean": 203.84725952148438, "std": 449.3397216796875, "min": -1006.80224609375, "p10": -377.0470611572265, "median": 157.57869720458984, "p90": 849.9583923339844, "max": 1185.262451171875, "pos_frac": 0.765625, "sample": [104.01425170898438, -476.571533203125, 938.9752197265625, 932.2733154296875, -709.8440551757812, 699.7429809570312, 148.11972045898438, 273.28497314453125, 853.6818237304688, 160.99624633789062, 1185.262451171875, 301.89453125, 428.6710510253906, 249.03912353515625, -262.8641357421875, -8.045589447021484, -215.9202880859375, 434.31256103515625, -761.9505004882812, 1114.949462890625, 805.603759765625, 320.3059387207031, 841.2703857421875, 442.18865966796875, 896.1508178710938, 242.5457763671875, -112.40281677246094, -22.533767700195312, 75.95231628417969, -412.3987121582031, 207.8211669921875, 208.06016540527344, 137.58334350585938, 438.6634216308594, 94.92028045654297, -103.31022644042969, 449.80859375, 128.31771850585938, -626.1231079101562, 261.16180419921875, -433.548095703125, -294.55987548828125, 697.2352905273438, 24.630470275878906, 154.16114807128906, 526.2577514648438, 120.32783508300781, 435.2594909667969, 571.9788818359375, 478.1153259277344, 90.75503540039062, -175.7627410888672, 412.37469482421875, 37.46788787841797, 166.27639770507812, -1006.80224609375, 57.29570388793945, 971.9671630859375, 31.269676208496094, 0.9439468383789062, 383.1067199707031, 33.80189895629883, 46.06853485107422, 53.99700927734375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000496.npy"}
|
|
{"epoch": 0.7283406754772394, "step": 497, "batch_size": 64, "mean": 207.31849670410156, "std": 483.5421142578125, "min": -937.5123901367188, "p10": -335.73664855957026, "median": 177.72314453125, "p90": 912.5368469238285, "max": 1389.3687744140625, "pos_frac": 0.671875, "sample": [-360.7436828613281, 955.7180786132812, 651.4623413085938, 680.3873901367188, 10.170886993408203, -277.38690185546875, 489.178466796875, -186.85206604003906, -113.20576477050781, -243.9868927001953, 232.0671844482422, 1035.7484130859375, 1189.203857421875, 400.98223876953125, 47.347923278808594, 208.5892333984375, 395.1716003417969, 214.3456268310547, 696.7548828125, 456.1424255371094, -102.14024353027344, -57.82329559326172, 598.8746337890625, 965.675048828125, -38.71366882324219, 10.874876022338867, 820.760498046875, -122.24586486816406, 76.65025329589844, -669.4885864257812, 466.0606689453125, 89.11927795410156, 7.688434600830078, 259.004150390625, 58.56314468383789, 1312.0255126953125, -32.05035400390625, 1389.3687744140625, 43.18638610839844, 77.53799438476562, 165.68118286132812, 293.7213439941406, 338.37274169921875, 668.8145751953125, -937.5123901367188, 189.76510620117188, 593.3384399414062, -441.1308898925781, 330.7384948730469, 123.58282470703125, 350.1750793457031, 269.28619384765625, 951.8695678710938, -272.9680480957031, -112.27037811279297, 237.75128173828125, -156.74241638183594, -389.5157470703125, -97.61665344238281, -871.87353515625, 686.616943359375, -508.54339599609375, 339.6767883300781, -116.85581970214844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000497.npy"}
|
|
{"epoch": 0.7298091042584435, "step": 498, "batch_size": 64, "mean": 347.5252685546875, "std": 589.2314453125, "min": -1304.3675537109375, "p10": -288.0455276489257, "median": 372.3294372558594, "p90": 1039.7615112304688, "max": 1937.865234375, "pos_frac": 0.71875, "sample": [-812.986572265625, 208.77822875976562, 665.6172485351562, 213.30618286132812, -172.34097290039062, 692.6446533203125, 652.2651977539062, 1052.4422607421875, 405.3403625488281, 743.8577880859375, 511.5296630859375, 800.0056762695312, 268.9856872558594, 918.1334838867188, -1304.3675537109375, 685.6394653320312, 986.9108276367188, 1379.925048828125, -84.30083465576172, 566.0845336914062, 116.62194061279297, 678.81884765625, -126.67291259765625, 1010.173095703125, -233.11111450195312, 374.1026916503906, 393.44775390625, 65.70087432861328, -400.39874267578125, -31.94109344482422, 290.4936218261719, 370.5561828613281, -1066.1405029296875, 1368.5933837890625, 502.1346740722656, 58.2525634765625, -154.28582763671875, 462.2896728515625, 601.5906982421875, 485.7255859375, 1286.818603515625, -26.98442840576172, 48.24566650390625, 13.713085174560547, 555.6968383789062, 580.462890625, -378.2107849121094, -4.155517578125, 1343.822998046875, 940.6829223632812, 252.2407684326172, -310.425048828125, 331.8025817871094, -196.84735107421875, -235.82664489746094, 16.824737548828125, 481.34832763671875, 158.337646484375, 935.7325439453125, 617.478759765625, -339.4103088378906, 1242.3460693359375, 1937.865234375, -153.36474609375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000498.npy"}
|
|
{"epoch": 0.7312775330396476, "step": 499, "batch_size": 64, "mean": 423.11358642578125, "std": 572.0934448242188, "min": -1022.9998168945312, "p10": -182.92580261230466, "median": 451.3472900390625, "p90": 1073.7696777343754, "max": 2244.95849609375, "pos_frac": 0.765625, "sample": [1105.730224609375, 95.56343078613281, 199.98355102539062, -1022.9998168945312, -77.89507293701172, 480.1156921386719, 559.9456787109375, 648.466552734375, -55.94495391845703, -91.91421508789062, 508.6311950683594, 529.4198608398438, 106.69152069091797, 999.195068359375, 555.5410766601562, 174.19064331054688, 720.471435546875, -658.2885131835938, 2244.95849609375, 288.315185546875, 223.30349731445312, 980.4031372070312, 697.144775390625, 27.813621520996094, -646.5299682617188, 423.2699279785156, 281.60260009765625, 986.096435546875, -244.9671173095703, 1313.3453369140625, 253.87789916992188, 15.67724609375, 556.2913208007812, -124.16251373291016, 295.8830871582031, -23.514150619506836, 583.6326293945312, 934.636474609375, 938.1625366210938, -231.4430389404297, 35.17366027832031, 665.3421020507812, 661.48828125, 605.6458129882812, 661.354248046875, 156.08612060546875, 703.0739135742188, 1721.34130859375, 1379.0164794921875, 108.63089752197266, 454.0311584472656, 770.5777587890625, 448.6634216308594, 821.9290771484375, -75.78280639648438, 813.987060546875, 1318.4217529296875, 347.7518615722656, -554.414794921875, -157.6339874267578, -193.76515197753906, 605.9395751953125, -17.72184181213379, 1249.433837890625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000499.npy"}
|
|
{"epoch": 0.7327459618208517, "step": 500, "batch_size": 64, "mean": 352.2098388671875, "std": 575.6246337890625, "min": -1175.8612060546875, "p10": -440.3055755615233, "median": 285.41009521484375, "p90": 952.8133300781251, "max": 1780.4996337890625, "pos_frac": 0.796875, "sample": [64.88278198242188, -216.2469940185547, -673.5013427734375, 1780.4996337890625, -595.2086791992188, 545.371826171875, 967.0838012695312, 510.0058288574219, 841.5845336914062, 121.08458709716797, 811.2130126953125, 726.62451171875, 766.387939453125, -490.6368103027344, 5.534696578979492, 520.6529541015625, 37.39678955078125, 69.986572265625, 303.64129638671875, 1399.922607421875, 643.5704345703125, -62.72428894042969, 745.963134765625, 1412.294677734375, -10.091224670410156, 765.9459228515625, -490.49078369140625, 874.9064331054688, 919.5155639648438, -766.3357543945312, 857.6279296875, 816.27294921875, -82.64387512207031, 157.23143005371094, 52.93500518798828, 831.41552734375, -674.5201416015625, 128.83090209960938, 1128.509765625, -1175.8612060546875, -182.0343780517578, 50.2449836730957, 186.35595703125, 692.016357421875, 91.8411636352539, 21.6290225982666, 631.001220703125, 82.16268157958984, 128.294921875, 267.17889404296875, 1226.308837890625, 796.0176391601562, 833.4451904296875, 180.0465087890625, 609.5564575195312, 491.6852111816406, 708.1845703125, 306.4617919921875, 25.95210838317871, 690.3529052734375, 70.83454895019531, 1256.47705078125, -323.2067565917969, 131.9900665283203], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000500.npy"}
|
|
{"epoch": 0.7342143906020558, "step": 501, "batch_size": 64, "mean": 373.15838623046875, "std": 592.7010498046875, "min": -931.5608520507812, "p10": -328.3506561279296, "median": 336.7568054199219, "p90": 1220.4635498046878, "max": 1953.343994140625, "pos_frac": 0.75, "sample": [-133.11355590820312, 384.7838134765625, 299.65570068359375, -413.6865539550781, -25.15158462524414, 1405.93212890625, 237.04466247558594, 943.142578125, 256.8671875, 102.66952514648438, 197.03536987304688, -179.57630920410156, 268.46453857421875, 125.40824890136719, 1953.343994140625, 744.547607421875, 1018.41796875, 407.114013671875, 447.0067443847656, 955.1304321289062, 1245.474365234375, -107.61015319824219, 295.96795654296875, 198.38323974609375, 893.1058349609375, -492.5455627441406, -642.4938354492188, 441.05072021484375, 1054.028564453125, 554.6538696289062, -825.6250610351562, 228.88250732421875, 547.8641967773438, 366.4879150390625, 421.088134765625, 1395.1737060546875, -257.71002197265625, 507.47235107421875, 359.0526428222656, 942.8218383789062, 1252.6531982421875, 12.677970886230469, -180.7509002685547, -355.8323059082031, 696.2996215820312, -18.50103759765625, 73.76878356933594, 533.79541015625, 68.59292602539062, -931.5608520507812, 314.4609680175781, 548.2860107421875, -264.226806640625, -100.57272338867188, 63.889530181884766, -481.983642578125, 1162.10498046875, 742.0913696289062, 1304.2255859375, 488.37841796875, 1813.69677734375, 556.760986328125, 449.197021484375, 14.125553131103516], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000501.npy"}
|
|
{"epoch": 0.73568281938326, "step": 502, "batch_size": 64, "mean": 381.4633483886719, "std": 482.5098571777344, "min": -565.3206176757812, "p10": -77.87691879272457, "median": 312.5458221435547, "p90": 1015.9113464355471, "max": 2566.7119140625, "pos_frac": 0.859375, "sample": [613.032470703125, -327.13092041015625, 973.4894409179688, 393.9052429199219, 43.8115234375, 709.4496459960938, -141.0347900390625, 9.752685546875, 1302.7454833984375, 365.399658203125, 387.021728515625, 268.8642272949219, 299.7012939453125, 1074.155029296875, 945.6868286132812, 20.2947998046875, 37.58111572265625, 309.6842346191406, -565.3206176757812, 70.71112060546875, 692.9794311523438, 1034.0921630859375, 97.16936492919922, 424.3417053222656, 35.22148132324219, 199.41880798339844, 73.61921691894531, 339.0947570800781, 465.530517578125, 317.3236083984375, -238.76707458496094, 1111.02197265625, 20.75444793701172, 550.3917236328125, 441.6796875, 878.4682006835938, -93.82667541503906, 2566.7119140625, 14.171920776367188, 1.4876575469970703, 786.2529907226562, 36.50205993652344, 277.78857421875, 308.5794677734375, 399.1468505859375, 569.1595458984375, 1161.58740234375, 38.53296661376953, 645.677978515625, 451.23114013671875, 787.4542236328125, -3.0455474853515625, -40.66082000732422, 115.06951904296875, 1084.8887939453125, -133.53993225097656, 315.40740966796875, 83.93521118164062, 725.280517578125, 302.0609130859375, -132.71963500976562, 362.9072265625, 195.85511779785156, 353.6180419921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000502.npy"}
|
|
{"epoch": 0.737151248164464, "step": 503, "batch_size": 64, "mean": 343.6672668457031, "std": 466.2494201660156, "min": -353.9173583984375, "p10": -139.0587936401367, "median": 243.3192367553711, "p90": 830.9392395019531, "max": 2142.075439453125, "pos_frac": 0.75, "sample": [1.6187248229980469, -66.31269073486328, 1164.48291015625, 11.065963745117188, -151.40736389160156, -197.57196044921875, 455.18878173828125, 230.02406311035156, 85.93689727783203, -66.37801361083984, -353.9173583984375, 347.85101318359375, -182.81405639648438, 268.40313720703125, 996.9057006835938, 818.7616577148438, 337.6207275390625, 804.8275146484375, 73.79547119140625, 780.996337890625, -33.51490783691406, -42.47966384887695, 80.61625671386719, 33.50708770751953, 382.81060791015625, -129.73497009277344, -105.44097137451172, 530.8133544921875, 198.38601684570312, 134.13287353515625, 520.1304931640625, -63.4295654296875, 113.8502197265625, 679.8550415039062, 23.862091064453125, -236.61761474609375, 2142.075439453125, 1773.8031005859375, 548.1015625, 378.4029541015625, 105.24386596679688, 691.01708984375, 756.6033935546875, 26.293142318725586, -77.78179931640625, 658.743408203125, 150.33201599121094, 816.9479370117188, 242.00924682617188, 385.14093017578125, 174.34719848632812, 633.1572265625, 447.374267578125, 244.6292266845703, 1056.08642578125, 541.8155517578125, -143.05471801757812, 764.585693359375, -200.77706909179688, 836.158203125, 287.667236328125, 462.91241455078125, 857.2793579101562, -10.231781005859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000503.npy"}
|
|
{"epoch": 0.7386196769456681, "step": 504, "batch_size": 64, "mean": 414.3384094238281, "std": 563.3538208007812, "min": -676.8782958984375, "p10": -331.8172622680663, "median": 343.8470916748047, "p90": 1116.7745727539063, "max": 1789.207763671875, "pos_frac": 0.78125, "sample": [-15.073883056640625, 138.43646240234375, 897.8919067382812, 414.3507080078125, 133.6667022705078, 335.1185607910156, 657.9771118164062, -237.42303466796875, 1683.882080078125, 1365.2425537109375, 1789.207763671875, -594.3264770507812, 605.8585205078125, 751.095703125, -47.09532165527344, -94.00837707519531, 673.5189208984375, 211.28311157226562, -6.016731262207031, 52.175445556640625, 26.74590301513672, 1.1628036499023438, 352.57562255859375, -246.5740509033203, 239.1813507080078, 246.0415802001953, -523.6599731445312, 530.3082885742188, 181.88540649414062, 425.87506103515625, -402.6255187988281, 1095.28076171875, -368.3500671386719, 1347.58154296875, 812.72900390625, 670.2168579101562, 785.2550048828125, 579.2098999023438, 261.6435241699219, 325.9727478027344, 83.75640869140625, 1268.3458251953125, 874.2311401367188, 507.1394348144531, 918.6211547851562, -155.44845581054688, -542.1490478515625, 866.7579956054688, 53.46659851074219, 947.549072265625, 206.35946655273438, 717.6491088867188, 1125.9862060546875, 732.572021484375, 308.01409912109375, 285.17315673828125, -382.11212158203125, 991.7693481445312, 1677.90576171875, 362.4380798339844, -676.8782958984375, 561.72314453125, 722.4896240234375, 6.0806884765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000504.npy"}
|
|
{"epoch": 0.7400881057268722, "step": 505, "batch_size": 64, "mean": 219.666259765625, "std": 460.5285949707031, "min": -777.8680419921875, "p10": -284.54921722412104, "median": 166.59014129638672, "p90": 907.0903930664062, "max": 1370.2735595703125, "pos_frac": 0.6875, "sample": [14.621269226074219, 514.8759765625, -307.57763671875, 636.1805419921875, -48.64283752441406, 925.4527587890625, 56.30189514160156, -36.81492614746094, 46.893550872802734, 24.455215454101562, 777.5480346679688, 904.1351318359375, 1370.2735595703125, -16.1722412109375, 575.0384521484375, 472.73565673828125, -162.01438903808594, 391.45245361328125, 136.17535400390625, -230.8162384033203, 346.7716369628906, 579.7442626953125, 184.885498046875, 83.74298095703125, -539.0060424804688, -688.6915893554688, 196.04017639160156, 279.7239074707031, 1056.360107421875, 166.53936767578125, -86.14649963378906, 434.1910400390625, 400.3531494140625, -228.91000366210938, 238.21194458007812, 720.1148681640625, 51.901161193847656, 712.6011962890625, -101.8447494506836, 555.4584350585938, -732.9978637695312, -777.8680419921875, 162.9603729248047, 185.6011505126953, 14.4169921875, 57.80565643310547, 967.0282592773438, 479.4881896972656, -225.08033752441406, 963.313232421875, 166.6409149169922, -190.2027587890625, -359.87481689453125, 892.2620849609375, 195.2247772216797, -79.55601501464844, 908.55322265625, -489.1580810546875, -126.098876953125, 46.23158264160156, 908.35693359375, 228.160400390625, 493.5923156738281, -36.30088424682617], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000505.npy"}
|
|
{"epoch": 0.7415565345080763, "step": 506, "batch_size": 64, "mean": 368.9680480957031, "std": 519.1372680664062, "min": -799.6482543945312, "p10": -187.8771102905273, "median": 332.9208068847656, "p90": 1077.28134765625, "max": 2015.00537109375, "pos_frac": 0.75, "sample": [-443.43157958984375, 38.632843017578125, 738.8993530273438, -4.9699249267578125, 234.19747924804688, 325.958740234375, -201.07022094726562, 567.5352172851562, 822.2042846679688, 48.216365814208984, 519.2648315429688, 261.9244384765625, 289.06744384765625, 153.30751037597656, -733.001708984375, 1085.621337890625, 477.39349365234375, -2.7162628173828125, 339.88287353515625, 699.0321655273438, 499.3658447265625, 1370.5225830078125, 344.74468994140625, 113.34691619873047, 2015.00537109375, -154.41770935058594, -279.47564697265625, 604.2655029296875, 728.2905883789062, 261.5677490234375, 436.442626953125, 1230.2445068359375, 424.6042175292969, 104.351318359375, 586.5010986328125, -631.8640747070312, 351.61322021484375, 161.97509765625, 889.727783203125, 905.82421875, 463.40850830078125, 715.9257202148438, -157.0931854248047, 460.98870849609375, 96.14737701416016, -799.6482543945312, 267.6056213378906, 3.4696273803710938, 1110.3482666015625, 1031.3629150390625, 1095.4593505859375, -4.150108337402344, 324.7742919921875, 807.8313598632812, 732.0243530273438, 1074.840576171875, -38.83802795410156, 1078.327392578125, 191.25868225097656, -40.401222229003906, -323.82086181640625, -94.90713500976562, 529.8372802734375, -89.3821792602539], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000506.npy"}
|
|
{"epoch": 0.7430249632892805, "step": 507, "batch_size": 64, "mean": 315.6078186035156, "std": 588.3258056640625, "min": -945.0108642578125, "p10": -350.09112854003905, "median": 280.380126953125, "p90": 881.5725952148439, "max": 2295.4765625, "pos_frac": 0.734375, "sample": [6.855581283569336, 408.79022216796875, -34.27613830566406, -945.0108642578125, 1238.9844970703125, -111.560302734375, 62.45805358886719, 416.4222717285156, 35.772804260253906, 227.12579345703125, -54.31129455566406, 429.2716979980469, 52.58229064941406, 471.18963623046875, 649.120361328125, 758.6508178710938, -452.25128173828125, 298.435302734375, 185.4140167236328, 2073.233154296875, 473.7509765625, 639.3067016601562, 68.06256103515625, -196.14633178710938, 506.93231201171875, 321.1837158203125, 647.2958984375, 892.5921020507812, -387.892333984375, 412.1116943359375, 269.20159912109375, -62.759857177734375, -334.05609130859375, -102.43443298339844, 17.855504989624023, 574.185302734375, 620.2283935546875, 1239.670166015625, 647.7984008789062, 68.27410125732422, 283.73016357421875, 20.910751342773438, -42.6002197265625, 364.1888732910156, 2295.4765625, 855.8604125976562, 163.40777587890625, 342.97491455078125, -423.4491882324219, 715.8385620117188, 780.86083984375, -587.8284912109375, -34.66767883300781, 717.2364501953125, 1816.8685302734375, 640.3504028320312, 905.6522216796875, 438.28533935546875, 277.03009033203125, 4.302953720092773, 30.59545135498047, -195.41781616210938, -356.9632873535156, -845.7997436523438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000507.npy"}
|
|
{"epoch": 0.7444933920704846, "step": 508, "batch_size": 64, "mean": 269.07879638671875, "std": 625.8920288085938, "min": -1553.1361083984375, "p10": -388.6345001220703, "median": 218.46559143066406, "p90": 974.528210449219, "max": 2132.926513671875, "pos_frac": 0.6875, "sample": [168.47314453125, 164.32708740234375, -663.5458374023438, 681.5845947265625, 537.9393310546875, -12.9498291015625, 128.94607543945312, 1950.65966796875, 395.8139343261719, 744.2454223632812, 456.5740661621094, 1001.5711669921875, 319.1713562011719, 2132.926513671875, 572.3746337890625, 320.75323486328125, 195.26565551757812, -710.38916015625, 1213.1243896484375, 911.427978515625, -524.0341796875, -200.95323181152344, 327.81988525390625, -1104.4716796875, 1025.3231201171875, -200.47225952148438, 125.78607177734375, -241.15505981445312, -110.28202819824219, 346.767578125, -75.57496643066406, 346.6922607421875, 4.7346343994140625, -30.14990997314453, 1752.688232421875, -1553.1361083984375, 115.32238006591797, 132.8158416748047, 187.29217529296875, 791.51416015625, 336.2900085449219, 106.00910186767578, 109.65338134765625, 462.28204345703125, 648.9807739257812, 241.66552734375, 530.21142578125, 557.855224609375, 758.3477172851562, 558.0366821289062, 622.1187133789062, -309.8487243652344, -283.6678466796875, 1085.11962890625, 579.0191650390625, -248.75439453125, -115.1904296875, -47.192138671875, -416.28582763671875, 676.8687133789062, -349.0843505859375, 384.63665771484375, 114.7363052368164, -405.5845642089844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000508.npy"}
|
|
{"epoch": 0.7459618208516887, "step": 509, "batch_size": 64, "mean": 298.84124755859375, "std": 463.45233154296875, "min": -608.7313842773438, "p10": -303.9815216064453, "median": 244.83094024658203, "p90": 974.8138122558595, "max": 1632.935546875, "pos_frac": 0.78125, "sample": [-17.99555015563965, -74.83901977539062, -608.7313842773438, 981.2359619140625, -120.0833740234375, 528.0187377929688, 61.46332550048828, 88.45423126220703, 507.97845458984375, 320.7329406738281, 680.9926147460938, 133.61419677734375, 296.58526611328125, 102.29568481445312, 168.83949279785156, 253.6924285888672, 950.3187866210938, -476.09039306640625, -467.016357421875, 402.1681823730469, 1114.902587890625, 452.9515686035156, 148.6292266845703, 259.24920654296875, 394.579345703125, 722.8699340820312, 164.707275390625, 74.13216400146484, 332.8334045410156, 429.6949768066406, 646.7203979492188, -411.6483459472656, 133.82032775878906, -147.4463653564453, 372.1382751464844, 994.3099975585938, 711.5233764648438, 113.17292785644531, 217.33731079101562, -285.10546875, -2.78271484375, 505.04443359375, 332.61383056640625, 89.551513671875, 47.37205505371094, -580.055908203125, 1191.50390625, 1288.8541259765625, -120.55519104003906, 1632.935546875, -321.6329345703125, 182.58807373046875, -312.0712585449219, 48.164093017578125, 257.5480651855469, 365.08697509765625, 122.44896697998047, 219.67202758789062, 235.96945190429688, 503.88385009765625, 731.6160278320312, 1194.4075927734375, 402.841064453125, 959.8287963867188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000509.npy"}
|
|
{"epoch": 0.7474302496328928, "step": 510, "batch_size": 64, "mean": 386.30255126953125, "std": 428.61993408203125, "min": -518.7579345703125, "p10": -163.43018188476555, "median": 380.16473388671875, "p90": 880.5156494140626, "max": 1440.7720947265625, "pos_frac": 0.78125, "sample": [252.22921752929688, 891.9432983398438, 1246.6268310546875, 435.53900146484375, -191.43209838867188, 811.4847412109375, 759.5302734375, 359.69781494140625, 33.32633972167969, 691.9409790039062, 407.5993957519531, 537.9701538085938, -299.2051086425781, 509.395263671875, 1116.7001953125, 838.7178344726562, 686.03515625, -518.7579345703125, -64.53227996826172, 853.8511352539062, 291.9169006347656, 358.7679443359375, 418.03924560546875, 210.96585083007812, 264.1983947753906, 665.0823974609375, 664.8672485351562, 893.8244018554688, 1051.6395263671875, -283.9373779296875, 818.4248046875, 1440.7720947265625, -98.09237670898438, 109.08721160888672, 740.0092163085938, 49.397743225097656, 281.4331970214844, 1153.8076171875, -300.395263671875, 298.70831298828125, 242.53321838378906, 83.62198638916016, 662.3623046875, 237.9298553466797, 555.2415161132812, -48.811073303222656, 461.9690856933594, 426.608642578125, 64.62089538574219, 400.63165283203125, 758.9124755859375, -85.6004867553711, 95.6243896484375, 769.9480590820312, -82.19099426269531, 533.276611328125, -325.9411926269531, 827.7694091796875, -45.54906463623047, -218.171875, -12.879079818725586, 694.4056396484375, 285.3278503417969, 54.545223236083984], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000510.npy"}
|
|
{"epoch": 0.748898678414097, "step": 511, "batch_size": 64, "mean": 310.4611511230469, "std": 569.7615356445312, "min": -1479.0628662109375, "p10": -161.33842773437493, "median": 217.49087524414062, "p90": 931.8967468261719, "max": 1756.6630859375, "pos_frac": 0.765625, "sample": [-29.79546356201172, 1252.52880859375, 170.36734008789062, 149.8632354736328, 427.68487548828125, 135.45018005371094, 931.3986206054688, 1388.54736328125, 507.56597900390625, 145.65077209472656, 98.34619903564453, 739.9437255859375, 689.7570190429688, 632.261962890625, 83.72035217285156, 1237.231201171875, -1479.0628662109375, 396.28350830078125, -431.5257568359375, 676.1590576171875, 1404.7977294921875, 805.5966186523438, 80.97364807128906, 482.810302734375, 246.22735595703125, 50.44818878173828, -728.3885498046875, 406.55987548828125, -85.85562133789062, 589.6729736328125, 489.91192626953125, 2.1049346923828125, -20.819833755493164, 265.60308837890625, 293.1084899902344, 117.59722137451172, 932.1102294921875, -193.68820190429688, -4.53155517578125, 211.9671630859375, 846.1094970703125, 215.97280883789062, 42.24800491333008, -347.3258361816406, -56.219810485839844, 722.307373046875, 780.7615966796875, 90.57975769042969, -8.462377548217773, 52.76429748535156, -194.33285522460938, 454.6695861816406, 543.21142578125, -1.817117691040039, 219.00894165039062, 434.93206787109375, 222.44320678710938, -11.567802429199219, 1756.6630859375, 93.02668762207031, 448.8449401855469, -1322.427490234375, 137.01541137695312, 1682.525634765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000511.npy"}
|
|
{"epoch": 0.750367107195301, "step": 512, "batch_size": 64, "mean": 417.19671630859375, "std": 434.2760925292969, "min": -428.68487548828125, "p10": -91.3902809143066, "median": 372.56101989746094, "p90": 1065.6143310546877, "max": 1751.6025390625, "pos_frac": 0.828125, "sample": [1101.438720703125, 919.274169921875, 726.2068481445312, -268.3819580078125, -110.09019470214844, 18.929290771484375, 1751.6025390625, 273.5244140625, 65.26382446289062, -47.75714874267578, 157.4780731201172, 667.1636962890625, 3.84136962890625, 428.59283447265625, -299.8970031738281, -17.99252700805664, 551.9671630859375, 521.8958740234375, 359.62921142578125, -14.046808242797852, -191.17733764648438, 713.9440307617188, 118.7373046875, 553.4229125976562, 1195.4576416015625, 733.3294067382812, 255.73045349121094, 465.94293212890625, 15.692375183105469, 713.2972412109375, 325.6043701171875, 342.7166748046875, 491.76678466796875, 542.7932739257812, 236.01014709472656, 190.5549774169922, 1105.49658203125, 385.4928283691406, -158.68634033203125, 876.30517578125, 113.37442016601562, 1143.415283203125, 420.3273010253906, 1088.68408203125, 35.43785095214844, -428.68487548828125, 358.86279296875, 35.56938552856445, 250.29148864746094, 391.19818115234375, 1118.9813232421875, -11.096214294433594, 318.3825378417969, 709.535400390625, 309.9640808105469, 499.3794250488281, 745.2223510742188, 176.52984619140625, 431.692138671875, 775.7238159179688, -195.7685089111328, 1011.784912109375, 863.9414672851562, 836.767333984375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000512.npy"}
|
|
{"epoch": 0.7518355359765051, "step": 513, "batch_size": 64, "mean": 297.572509765625, "std": 473.8290100097656, "min": -663.374755859375, "p10": -207.22743682861326, "median": 249.83712768554688, "p90": 1042.0314819335938, "max": 1438.91845703125, "pos_frac": 0.71875, "sample": [685.76513671875, 347.9437255859375, 326.92498779296875, 125.73249053955078, 304.7579650878906, 281.8180236816406, 291.62371826171875, 194.66697692871094, -179.38597106933594, 1005.4346923828125, 589.8258666992188, -166.8865966796875, 653.7241821289062, 65.62059783935547, 62.957786560058594, 763.0537719726562, 32.554779052734375, -23.3345947265625, 208.1288604736328, 281.84893798828125, 899.7205810546875, 1127.6591796875, 1276.0843505859375, 211.494384765625, 600.581787109375, 2.8472023010253906, 459.107177734375, 226.31671142578125, 332.5108642578125, -195.78387451171875, 94.74591064453125, 839.0888671875, 551.5897216796875, 423.51629638671875, 313.03369140625, -53.4364013671875, -134.2344207763672, -612.3705444335938, 273.3575439453125, 1438.91845703125, -40.07099914550781, 138.3392333984375, 67.038330078125, -663.374755859375, 617.4157104492188, -22.21380615234375, -397.9588623046875, -56.62737274169922, 1066.749755859375, 834.8319091796875, -539.8618774414062, -1.9435539245605469, 1057.7158203125, 1167.5294189453125, 323.43359375, 173.49014282226562, -364.43170166015625, 640.421875, -108.64237213134766, -212.13182067871094, -317.71746826171875, 1223.96826171875, 185.6748046875, 345.482666015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000513.npy"}
|
|
{"epoch": 0.7533039647577092, "step": 514, "batch_size": 64, "mean": 295.4927673339844, "std": 490.9858093261719, "min": -797.73486328125, "p10": -184.60428161621093, "median": 199.8984375, "p90": 913.1573242187501, "max": 1847.56689453125, "pos_frac": 0.703125, "sample": [-424.07354736328125, -145.47537231445312, 116.85090637207031, -71.56608581542969, 1080.596923828125, 223.81265258789062, 625.4901123046875, 590.5390014648438, 1847.56689453125, 917.1830444335938, 147.60142517089844, 624.6970825195312, -124.48218536376953, 33.80743408203125, 319.90936279296875, -108.64097595214844, -261.4247741699219, 188.39993286132812, 1167.7342529296875, 1125.773193359375, -149.39395141601562, -80.99250030517578, 339.5162048339844, -101.6094970703125, 34.709205627441406, 298.7516784667969, 284.62164306640625, 119.81625366210938, 454.29998779296875, -110.09009552001953, 892.0993041992188, 543.9781494140625, -193.867431640625, -42.93804931640625, 145.47177124023438, 283.60064697265625, 1149.616455078125, -162.99026489257812, 34.54845428466797, -244.92803955078125, 345.406982421875, -421.953125, -87.56647491455078, 462.47235107421875, 246.72396850585938, 105.82444763183594, -797.73486328125, -131.9344024658203, -262.47552490234375, 891.2322998046875, 211.39694213867188, 472.98236083984375, 436.73553466796875, 714.3936767578125, 903.7639770507812, 147.5899658203125, 304.70599365234375, 670.7545166015625, 721.3763427734375, 89.53153991699219, 746.2730102539062, 41.1331672668457, 138.41616821289062, 1593.96923828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000514.npy"}
|
|
{"epoch": 0.7547723935389133, "step": 515, "batch_size": 64, "mean": 225.076171875, "std": 442.7667541503906, "min": -1599.1011962890625, "p10": -245.14678649902342, "median": 225.03799438476562, "p90": 734.3702087402347, "max": 1325.9781494140625, "pos_frac": 0.75, "sample": [-109.33567810058594, -120.82600402832031, 481.5758972167969, 892.9921875, 576.4348754882812, -1599.1011962890625, 484.8462219238281, 429.52728271484375, 152.16790771484375, -571.3133544921875, -265.6177978515625, -11.902603149414062, 8.701904296875, 216.2021484375, 277.87542724609375, 297.6318664550781, 170.1896209716797, 536.2523193359375, -352.29791259765625, 138.59127807617188, 343.3828430175781, -106.23948669433594, 159.8214569091797, 329.45220947265625, 237.33518981933594, 73.58378601074219, 607.8402099609375, -255.821533203125, 506.51116943359375, 38.40348815917969, 772.6082153320312, 29.90433692932129, -220.23904418945312, 645.148193359375, 233.87384033203125, 375.45733642578125, 590.8617553710938, 13.636711120605469, 1199.5853271484375, -183.41049194335938, 162.2756805419922, 988.2586059570312, -367.1263427734375, -18.985984802246094, -396.2530212402344, 238.27877807617188, 108.84925079345703, 809.3547973632812, -11.362865447998047, 95.474853515625, 101.4157943725586, 1053.276611328125, 194.10049438476562, 154.9871826171875, 337.0713195800781, 599.342529296875, 501.4598083496094, -105.35995483398438, 336.8797607421875, 383.5682373046875, 317.1559143066406, 234.10427856445312, 1325.9781494140625, 337.8408203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000515.npy"}
|
|
{"epoch": 0.7562408223201175, "step": 516, "batch_size": 64, "mean": 350.2784423828125, "std": 423.8194274902344, "min": -575.0857543945312, "p10": -163.24182586669917, "median": 350.2778625488281, "p90": 908.2671203613285, "max": 1241.86328125, "pos_frac": 0.796875, "sample": [183.30526733398438, 217.25399780273438, 768.3536376953125, -125.96692657470703, 576.3359985351562, 777.8609619140625, 470.2342224121094, -128.68702697753906, 248.3542022705078, 486.33599853515625, 1241.86328125, 461.55584716796875, 515.4243774414062, 512.3876953125, 180.4442596435547, -123.48403930664062, 589.3065795898438, 7.8334808349609375, 126.61846923828125, 1211.37060546875, -575.0857543945312, 103.6463623046875, -178.051025390625, -451.9931640625, 940.789306640625, 489.69329833984375, -510.3858337402344, 832.3820190429688, 1197.0367431640625, 188.5713653564453, 320.2629089355469, 808.9959106445312, 992.2962036132812, 259.573486328125, 570.5291137695312, 669.5684814453125, 277.03167724609375, -14.038028717041016, 478.8871154785156, 366.66650390625, 160.9579315185547, -44.654815673828125, 965.1503295898438, 249.13662719726562, 74.33291625976562, 1043.893310546875, 206.53981018066406, 201.79559326171875, 20.688316345214844, 483.0647277832031, 422.69146728515625, 498.872314453125, 86.79151916503906, 581.0501708984375, 800.9819946289062, -218.72918701171875, -343.34820556640625, -462.0380859375, 516.7219848632812, 616.8665771484375, 532.555419921875, 778.3236083984375, 333.88922119140625, -50.79335403442383], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000516.npy"}
|
|
{"epoch": 0.7577092511013216, "step": 517, "batch_size": 64, "mean": 408.0665283203125, "std": 538.670166015625, "min": -655.7013549804688, "p10": -191.9057479858398, "median": 367.2696228027344, "p90": 925.2973266601563, "max": 2370.55078125, "pos_frac": 0.765625, "sample": [540.2921142578125, 699.818115234375, 98.049072265625, 344.94818115234375, 841.05419921875, 253.37338256835938, -392.62384033203125, 843.8239135742188, 513.1073608398438, -203.3366241455078, 559.9391479492188, 1898.7725830078125, 1353.463134765625, 369.58233642578125, 508.4737548828125, -126.21541595458984, 267.49676513671875, 564.5325927734375, 125.03936004638672, 783.9463500976562, 1475.582763671875, 669.0352783203125, -6.980672836303711, 274.5314636230469, -655.7013549804688, -260.32501220703125, 123.06896209716797, 400.5074157714844, -375.6038513183594, 2370.55078125, 391.92071533203125, -10.062973022460938, -137.29005432128906, 929.7713623046875, 134.96441650390625, 620.638671875, -462.6051025390625, 458.7773742675781, -51.1998291015625, 409.412109375, 805.1146240234375, -104.2844467163086, 364.9569091796875, 303.1481628417969, 629.4696044921875, 547.6724853515625, 717.5935668945312, 390.75823974609375, 34.92945098876953, 1321.0584716796875, 129.77130126953125, 1121.808349609375, 356.328125, 914.85791015625, 180.505615234375, 751.2454833984375, -272.40301513671875, 211.17611694335938, 273.7471618652344, 414.0709228515625, -165.23370361328125, 855.2294311523438, 255.03372192382812, -62.824501037597656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000517.npy"}
|
|
{"epoch": 0.7591776798825257, "step": 518, "batch_size": 64, "mean": 336.003662109375, "std": 432.7972717285156, "min": -842.8505859375, "p10": -239.32052612304685, "median": 276.92066955566406, "p90": 925.6082275390626, "max": 1189.91455078125, "pos_frac": 0.796875, "sample": [128.94223022460938, 1140.7247314453125, 515.60595703125, 349.97589111328125, 246.7366180419922, 260.8826904296875, 498.6963806152344, 673.3876953125, 83.78779602050781, 232.19061279296875, 933.2696533203125, 662.6155395507812, 104.5147705078125, -286.75885009765625, 550.64501953125, 243.35707092285156, 400.895751953125, 172.52740478515625, 59.2762451171875, 492.52105712890625, 996.9091796875, -43.306739807128906, -223.50686645507812, 134.04766845703125, -68.36627197265625, 316.3471984863281, 783.6973266601562, 136.19482421875, 769.8211669921875, 885.5684204101562, 907.7315673828125, 66.09385681152344, 1183.255615234375, 1189.91455078125, 892.8496704101562, 696.580322265625, 118.99419403076172, -110.88031005859375, 499.3396301269531, 604.6056518554688, 293.93695068359375, -101.8785400390625, 477.6662292480469, -246.09780883789062, 466.8794860839844, -376.31915283203125, -348.801025390625, -842.8505859375, 354.6785583496094, 490.2149963378906, 33.40946960449219, 212.18115234375, -264.5906982421875, 189.79248046875, -395.20465087890625, 1020.63720703125, 1013.90380859375, 253.72140502929688, -32.24985122680664, 902.5640258789062, 292.9586486816406, 135.93714904785156, 562.8551025390625, 211.2041778564453], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000518.npy"}
|
|
{"epoch": 0.7606461086637298, "step": 519, "batch_size": 64, "mean": 231.53121948242188, "std": 444.94757080078125, "min": -949.0491943359375, "p10": -280.0728302001953, "median": 249.53829193115234, "p90": 861.2126464843753, "max": 1260.77490234375, "pos_frac": 0.65625, "sample": [-82.81826782226562, 535.5498657226562, 337.8779602050781, -434.5287780761719, 1005.5730590820312, -261.21875, 76.91807556152344, 498.97344970703125, 985.9931030273438, 612.9601440429688, 111.63700866699219, 593.1902465820312, -233.75540161132812, 895.0741577148438, 320.0937194824219, -853.0052490234375, 1187.117919921875, 574.39794921875, 79.99181365966797, -75.90003967285156, 295.1595458984375, -32.97663879394531, 50.45935821533203, 320.33209228515625, 517.8058471679688, 134.61465454101562, 288.6554870605469, 447.00238037109375, 493.89556884765625, -18.43760108947754, -33.18255615234375, 671.8211669921875, -144.41983032226562, -93.591796875, 333.38604736328125, 42.244850158691406, 554.05908203125, -40.600257873535156, -280.8697509765625, -77.92817687988281, 339.462158203125, -278.2133483886719, 980.1351928710938, 449.929931640625, -949.0491943359375, 577.3357543945312, 1260.77490234375, 39.842811584472656, -49.22564697265625, -3.0577545166015625, 152.85964965820312, 895.0060424804688, -382.3453674316406, 636.2236328125, -328.5924072265625, 249.8466796875, 782.3613891601562, 18.670772552490234, 353.7666931152344, 513.3770141601562, 249.2299041748047, -43.67594528198242, 364.8865661621094, -313.10235595703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000519.npy"}
|
|
{"epoch": 0.762114537444934, "step": 520, "batch_size": 64, "mean": 316.0799560546875, "std": 492.4458923339844, "min": -1066.5230712890625, "p10": -177.17986755371092, "median": 241.601318359375, "p90": 966.5399353027345, "max": 2128.75244140625, "pos_frac": 0.75, "sample": [-156.70608520507812, -290.8548583984375, 995.80029296875, 114.69401550292969, 462.9303283691406, 151.187744140625, 421.4401550292969, -59.70008850097656, -185.954345703125, 172.1512451171875, 11.380630493164062, -55.222137451171875, 633.9439697265625, -47.37922668457031, -37.356483459472656, 553.2345581054688, 1298.0233154296875, 2128.75244140625, -192.00094604492188, 338.5931396484375, 1100.457763671875, 1380.3359375, -321.5484924316406, 192.47012329101562, -36.820465087890625, 746.9788818359375, 124.41254425048828, 571.746826171875, 474.8458251953125, 321.9464111328125, 250.0484619140625, 251.0968780517578, 1251.434814453125, 156.09779357910156, 75.28872680664062, 471.3455810546875, 535.3140869140625, 447.4075622558594, 764.3948364257812, -1066.5230712890625, 529.1773071289062, 137.94944763183594, 193.99513244628906, 317.97637939453125, 41.79425048828125, 981.8162231445312, 40.64727020263672, 893.4216918945312, 466.56549072265625, 233.1541748046875, 181.3281707763672, -289.5069580078125, -231.86825561523438, 329.73834228515625, 322.4642028808594, 569.9920654296875, -151.5869140625, 930.895263671875, -154.22630310058594, 571.1174926757812, 373.52960205078125, -131.53111267089844, 1.230804443359375, 123.35488891601562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000520.npy"}
|
|
{"epoch": 0.7635829662261381, "step": 521, "batch_size": 64, "mean": 340.51385498046875, "std": 454.4649658203125, "min": -766.3245849609375, "p10": -105.91214599609374, "median": 226.33580017089844, "p90": 966.1593505859375, "max": 1674.8177490234375, "pos_frac": 0.796875, "sample": [446.3504638671875, 753.2576904296875, 189.47906494140625, 27.033035278320312, -82.18399810791016, -766.3245849609375, -198.749267578125, -19.613845825195312, -99.920166015625, 586.44140625, 42.6514892578125, -239.367431640625, 11.877361297607422, 638.0852661132812, 475.8005065917969, 1102.9920654296875, 76.98443603515625, 8.62442398071289, -235.3376007080078, 947.515869140625, 213.51231384277344, 4.7895050048828125, -102.73186492919922, 239.15928649902344, 539.156982421875, 678.0612182617188, 605.129150390625, 151.20986938476562, 95.1670913696289, 31.39263916015625, 406.2271728515625, 593.2195434570312, -25.026954650878906, 1054.3201904296875, 166.57275390625, 599.4481201171875, 524.0728759765625, 195.46249389648438, 573.733642578125, -107.2751235961914, 242.70230102539062, 401.6090087890625, 349.7220458984375, 86.37528991699219, 194.8955078125, 1072.7440185546875, -118.48616027832031, 1130.959716796875, -6.379180908203125, -513.14453125, 570.1239013671875, 415.061767578125, 703.1187744140625, 6.5393524169921875, 711.4385986328125, 974.1494140625, 105.27433776855469, 1464.775146484375, 33.860313415527344, 1674.8177490234375, 481.06097412109375, 99.60893249511719, 846.2070922851562, 764.6553955078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000521.npy"}
|
|
{"epoch": 0.7650513950073421, "step": 522, "batch_size": 64, "mean": 304.40625, "std": 487.6728820800781, "min": -795.6722412109375, "p10": -160.09324493408204, "median": 244.43445587158203, "p90": 1037.7237976074218, "max": 1326.188232421875, "pos_frac": 0.796875, "sample": [235.29713439941406, 699.5376586914062, 898.3917846679688, -28.69506072998047, 1043.992431640625, 117.7158203125, 1326.188232421875, 190.12478637695312, 55.38431167602539, 13.649883270263672, -152.87570190429688, 423.8899841308594, 763.9095458984375, 41.51158142089844, 505.3223876953125, -77.02487182617188, 302.8480529785156, -95.14280700683594, 448.8817138671875, 257.2490234375, 649.2283935546875, 627.5333862304688, 99.37616729736328, 315.1240234375, -602.192626953125, 1171.273681640625, 300.27520751953125, 149.8270263671875, 493.11712646484375, 557.0027465820312, 302.22015380859375, 118.11363220214844, 110.86360168457031, 44.22821044921875, 290.81585693359375, -795.6722412109375, 534.774658203125, 130.65011596679688, 179.1114501953125, -363.681640625, -163.1864776611328, 444.64483642578125, 974.391357421875, 88.62772369384766, -459.027587890625, 539.580078125, 920.0725708007812, 305.3541259765625, -769.3681030273438, 1179.284912109375, 1141.6781005859375, -39.038665771484375, 1266.1221923828125, 50.42371368408203, 126.09561157226562, -602.2442626953125, 27.018836975097656, -90.1856918334961, 606.6293334960938, 253.57177734375, 1023.0969848632812, 84.07431030273438, 1204.6842041015625, 87.55592346191406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000522.npy"}
|
|
{"epoch": 0.7665198237885462, "step": 523, "batch_size": 64, "mean": 327.42437744140625, "std": 560.4797973632812, "min": -1002.2962036132812, "p10": -282.9564392089843, "median": 283.86358642578125, "p90": 1098.0642089843752, "max": 1750.0721435546875, "pos_frac": 0.6875, "sample": [-143.76365661621094, 1177.545166015625, 128.9683380126953, 264.25042724609375, 384.66131591796875, -300.07000732421875, 422.459228515625, 472.5729675292969, 955.322509765625, 430.07977294921875, -853.2615356445312, 726.4091186523438, -1.1029205322265625, -115.87405395507812, -396.5157165527344, 23.814619064331055, 254.7423095703125, 751.7984008789062, -489.7036437988281, 209.19094848632812, 609.09033203125, 1027.939453125, -119.0821533203125, 10.89659309387207, 492.774169921875, 115.27619934082031, 303.47674560546875, 124.25137329101562, -94.30785369873047, 1515.3194580078125, 565.9114379882812, 129.05506896972656, -55.8990478515625, -1002.2962036132812, 1128.11767578125, 873.03271484375, -455.2230529785156, -35.28425598144531, 529.6984252929688, -165.55445861816406, 701.4249267578125, -99.20220947265625, 1750.0721435546875, 350.2860412597656, 779.3244018554688, -29.079925537109375, 328.7110290527344, 133.94711303710938, 1627.5777587890625, 487.0421447753906, 582.3929443359375, 152.08865356445312, 572.0960083007812, 168.3157958984375, 752.8136596679688, 1201.1544189453125, 819.816162109375, -83.54756164550781, 613.4334716796875, 1172.716796875, 590.440673828125, -714.755859375, -243.0247802734375, -57.601070404052734], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000523.npy"}
|
|
{"epoch": 0.7679882525697503, "step": 524, "batch_size": 64, "mean": 341.17156982421875, "std": 521.1823120117188, "min": -1562.6910400390625, "p10": -87.22211303710937, "median": 407.4591979980469, "p90": 918.4738830566407, "max": 1695.056884765625, "pos_frac": 0.75, "sample": [19.135543823242188, 868.8897705078125, 435.08355712890625, 537.5540161132812, 434.6937561035156, 257.761474609375, 368.51300048828125, 29.46778106689453, 577.0908813476562, 324.12841796875, 437.43560791015625, 97.13357543945312, 414.4714660644531, 562.8721923828125, 691.9537353515625, 408.9719543457031, -12.7484130859375, 908.6312866210938, 277.0653076171875, 615.5741577148438, -1562.6910400390625, 448.0322265625, 589.7952270507812, 1695.056884765625, -53.43559265136719, -22.65679931640625, 922.692138671875, 391.9123840332031, 532.2096557617188, -17.630435943603516, 7.776088714599609, 368.807373046875, 506.67071533203125, -211.6251678466797, 710.5401611328125, 282.4762878417969, 1206.930908203125, 1491.3671875, 724.1771850585938, 492.57513427734375, -35.24816131591797, -882.352783203125, 12.07684326171875, -59.01409149169922, 660.4572143554688, -88.84475708007812, 368.008544921875, 7.640830993652344, 461.19952392578125, 483.9961242675781, 1101.855224609375, 981.1891479492188, 615.5652465820312, -1009.2618408203125, -74.83052825927734, -103.28953552246094, 990.770751953125, 642.4471435546875, -59.683433532714844, 405.9464416503906, 587.6219482421875, 393.72216796875, -238.2152099609375, -83.43594360351562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000524.npy"}
|
|
{"epoch": 0.7694566813509545, "step": 525, "batch_size": 64, "mean": 438.11553955078125, "std": 538.010986328125, "min": -790.8737182617188, "p10": -93.9471954345703, "median": 342.2528839111328, "p90": 1266.124340820313, "max": 1625.926513671875, "pos_frac": 0.796875, "sample": [380.95635986328125, 1108.949462890625, 436.7550048828125, 142.05496215820312, 36.12987518310547, 408.1159362792969, 1625.926513671875, 832.0304565429688, 385.3682556152344, -270.2761535644531, 1305.5780029296875, -454.04931640625, 856.4324340820312, 194.8290557861328, 241.51980590820312, -146.1193084716797, 154.58787536621094, 285.3971252441406, 992.866455078125, 656.15234375, 531.783935546875, 63.582157135009766, -94.664794921875, 150.52322387695312, -182.32501220703125, 347.919189453125, 511.92047119140625, 556.0537109375, 336.5865783691406, 1174.0657958984375, 1082.8282470703125, 53.668968200683594, 1576.9781494140625, -8.549903869628906, 1613.1685791015625, 350.70977783203125, 169.42779541015625, 251.82260131835938, 405.49957275390625, -92.27279663085938, 1603.796630859375, -790.8737182617188, -36.12762451171875, -8.734365463256836, 319.6644592285156, 308.4302978515625, 500.1363220214844, 127.94747924804688, 597.6290893554688, 184.23931884765625, 734.634521484375, 1458.4755859375, -64.87905883789062, -0.93743896484375, 692.8292846679688, 370.64630126953125, 317.7685241699219, 108.74034881591797, -384.397216796875, 993.6121826171875, 209.23178100585938, 1583.298095703125, 382.3489685058594, 859.983154296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000525.npy"}
|
|
{"epoch": 0.7709251101321586, "step": 526, "batch_size": 64, "mean": 420.246337890625, "std": 556.9052124023438, "min": -1063.6470947265625, "p10": -202.66725463867186, "median": 421.4828796386719, "p90": 1177.990075683594, "max": 1472.4678955078125, "pos_frac": 0.734375, "sample": [1127.797119140625, 812.3160400390625, 32.92433547973633, 264.4771728515625, 360.896240234375, 277.7560119628906, -208.3822784423828, -0.18695640563964844, 547.1725463867188, -701.4600830078125, 607.1007690429688, 805.6802978515625, 791.0569458007812, 248.36831665039062, 1241.539306640625, 1017.9654541015625, -122.67239379882812, 749.5148315429688, 294.7823791503906, 611.499755859375, 397.7764892578125, 534.10791015625, 229.71217346191406, 1074.31201171875, 532.9118041992188, 590.3087158203125, 1472.4678955078125, 0.1345367431640625, 374.49298095703125, -189.3321990966797, -222.88719177246094, 657.4629516601562, -499.0543518066406, 1389.98193359375, -177.10546875, 313.1722717285156, 998.2860107421875, 720.13330078125, 206.38333129882812, -1063.6470947265625, 836.5667114257812, 669.1077880859375, 438.13372802734375, 1277.0897216796875, -531.6384887695312, 741.1485595703125, 723.394287109375, 1113.0699462890625, -57.616355895996094, 502.30023193359375, -11.483306884765625, 240.5102081298828, 302.9755554199219, 1214.57421875, 1026.39501953125, -181.24557495117188, 1258.369140625, 605.0421752929688, 404.83203125, -55.18748474121094, -147.04840087890625, -16.466115951538086, -754.32275390625, 1199.5013427734375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000526.npy"}
|
|
{"epoch": 0.7723935389133627, "step": 527, "batch_size": 64, "mean": 293.5634765625, "std": 432.2890319824219, "min": -708.191650390625, "p10": -235.4161666870117, "median": 234.38238525390625, "p90": 803.1895385742188, "max": 1217.2864990234375, "pos_frac": 0.765625, "sample": [-565.3636474609375, 492.0705871582031, 331.8761291503906, 390.3924255371094, 685.2420043945312, 158.30654907226562, 1217.2864990234375, -103.00415802001953, -239.77151489257812, 771.05322265625, -156.0542449951172, -197.08676147460938, 669.9265747070312, 250.1126251220703, -708.191650390625, -593.569091796875, 496.91558837890625, -71.40637969970703, 676.2018432617188, 460.54888916015625, -345.55328369140625, 1072.5435791015625, 112.7313232421875, 60.32285690307617, 140.5143280029297, 809.874755859375, -22.530197143554688, 67.21027374267578, 925.7503662109375, 703.0816650390625, 544.1182861328125, 1061.3233642578125, 435.9293212890625, 665.7728881835938, 1159.6739501953125, 1037.7950439453125, 787.5906982421875, -27.201208114624023, 214.6849822998047, 237.25350952148438, 641.4829711914062, 165.46145629882812, 330.77935791015625, 221.45802307128906, -130.514404296875, 63.642791748046875, 325.5828857421875, 341.0450744628906, 160.9522705078125, -433.69036865234375, 277.0982666015625, 184.58551025390625, 706.8345947265625, 218.18994140625, 115.78677368164062, 83.41178894042969, 689.805908203125, 664.315673828125, 530.2672119140625, 218.3604278564453, 231.51126098632812, -226.91934204101562, 41.301307678222656, -239.0576629638672], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000527.npy"}
|
|
{"epoch": 0.7738619676945668, "step": 528, "batch_size": 64, "mean": 356.39044189453125, "std": 578.6063842773438, "min": -753.078125, "p10": -395.55372924804686, "median": 284.60243225097656, "p90": 1176.3712158203125, "max": 1946.773193359375, "pos_frac": 0.734375, "sample": [7.749229431152344, 1218.000244140625, 306.3672790527344, 291.8287658691406, 137.81866455078125, -396.86236572265625, -112.84253692626953, 705.1298828125, 578.3967895507812, 1225.626220703125, 411.85308837890625, -99.55625915527344, 412.5897216796875, -425.20721435546875, 116.7765884399414, 670.9050903320312, 358.189208984375, -126.44766235351562, 1165.6968994140625, 1946.773193359375, -17.666847229003906, 1027.9224853515625, 494.99969482421875, 722.5581665039062, 575.0034790039062, 916.2998657226562, -542.105712890625, 1317.4140625, -236.253173828125, 1096.738037109375, -551.2777099609375, 277.3760986328125, 385.8652648925781, 213.09860229492188, 318.452880859375, 180.26695251464844, 547.02001953125, 146.86915588378906, 505.9900817871094, 18.355445861816406, -225.82041931152344, 854.7549438476562, -157.15866088867188, -753.078125, 1481.74365234375, 214.69284057617188, 477.1646728515625, 1638.267578125, 161.6179962158203, 40.01068115234375, -577.6348266601562, -552.5634765625, 674.78173828125, 1180.9459228515625, 145.91940307617188, 832.5149536132812, 616.12109375, 208.19747924804688, -392.500244140625, -15.477432250976562, 116.13400268554688, -119.14387512207031, 1067.882080078125, 101.90474700927734], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000528.npy"}
|
|
{"epoch": 0.775330396475771, "step": 529, "batch_size": 64, "mean": 237.14752197265625, "std": 585.0148315429688, "min": -1584.3756103515625, "p10": -478.7118286132812, "median": 238.74124145507812, "p90": 901.0221923828128, "max": 1566.9703369140625, "pos_frac": 0.671875, "sample": [97.94862365722656, -221.6194305419922, 1566.9703369140625, -24.69134521484375, 173.63299560546875, 589.9404296875, 457.7382507324219, 767.6803588867188, 553.5167236328125, 1447.119873046875, -280.5191955566406, 509.19580078125, 120.69905853271484, -481.97088623046875, 585.8867797851562, -159.8647003173828, 140.64434814453125, 75.6709213256836, 802.8151245117188, -69.9792251586914, -371.6921081542969, 287.85418701171875, 1166.89501953125, 761.139892578125, 992.3920288085938, -721.9035034179688, -37.38910675048828, 262.6841735839844, -593.304443359375, 664.3567504882812, 274.3262634277344, 542.7189331054688, -136.98178100585938, 355.17156982421875, -52.144256591796875, 36.75126647949219, 747.940673828125, 74.50486755371094, 610.40478515625, 760.0241088867188, -1584.3756103515625, 203.83892822265625, 1012.1732177734375, -540.0037231445312, 616.1257934570312, -1037.0994873046875, 111.77181243896484, 453.41925048828125, 932.8942260742188, 37.131507873535156, -786.9905395507812, 426.061767578125, -57.138832092285156, 680.014404296875, -471.10736083984375, 487.4315490722656, 214.79830932617188, 826.6541137695312, 604.968017578125, 1268.65185546875, -335.0563049316406, -197.9375457763672, -313.4888610839844, 350.1405334472656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000529.npy"}
|
|
{"epoch": 0.7767988252569751, "step": 530, "batch_size": 64, "mean": 394.1950988769531, "std": 707.1098022460938, "min": -2146.034423828125, "p10": -285.1765563964844, "median": 265.4164276123047, "p90": 1461.4590698242193, "max": 2280.613037109375, "pos_frac": 0.734375, "sample": [323.7450256347656, -380.6220397949219, -12.858955383300781, 176.07318115234375, 319.1336975097656, -270.2833557128906, 1750.9473876953125, 570.1223754882812, 353.9961242675781, -855.5955810546875, 119.82019805908203, -80.07390594482422, -16.36492919921875, -140.51205444335938, 738.6797485351562, 777.2002563476562, -51.65861511230469, 4.23516845703125, -405.06195068359375, 1310.887451171875, 1663.729248046875, 602.7403564453125, 965.6138305664062, 758.7120971679688, -111.78276062011719, 831.1864013671875, -110.343017578125, 177.35610961914062, -304.6800537109375, 605.9511108398438, -5.167112350463867, 1639.7376708984375, 1170.5137939453125, 523.7686157226562, 338.9269714355469, 47.51325607299805, 851.8043212890625, 69.29190826416016, -291.5593566894531, -426.9673156738281, 155.59861755371094, 1846.8668212890625, 710.91455078125, 2280.613037109375, 77.36376953125, 329.4670104980469, 402.3329772949219, 256.6404113769531, 1518.44775390625, 212.49325561523438, 369.3437194824219, -0.6743106842041016, 860.7401733398438, -2146.034423828125, 1328.4854736328125, 151.60523986816406, 11.592193603515625, 1702.39599609375, 549.8233032226562, 274.19244384765625, 81.7646484375, 166.95257568359375, 637.2872924804688, 222.11862182617188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000530.npy"}
|
|
{"epoch": 0.7782672540381792, "step": 531, "batch_size": 64, "mean": 348.384521484375, "std": 561.2249755859375, "min": -1051.8900146484375, "p10": -240.5874786376953, "median": 238.29672241210938, "p90": 1072.9100524902349, "max": 1888.339111328125, "pos_frac": 0.78125, "sample": [597.6168212890625, 104.7226791381836, 595.6563720703125, 1114.1201171875, 929.9428100585938, -539.2804565429688, 224.60104370117188, 226.65676879882812, 1747.614501953125, -76.35317993164062, -266.4173583984375, 469.2530212402344, 10.360456466674805, 419.0141906738281, 283.6479187011719, 195.97364807128906, 440.3729553222656, 227.1607208251953, -1051.8900146484375, 371.31036376953125, 976.7532348632812, 742.0460815429688, 377.77508544921875, 81.30351257324219, 790.7698364257812, 192.041259765625, 473.1450500488281, 229.0251007080078, 491.2762756347656, -55.636680603027344, 1232.474365234375, -134.66888427734375, 667.338623046875, 691.507568359375, 105.88853454589844, 239.10556030273438, -29.27544403076172, -619.6021728515625, -16.328716278076172, 172.1898193359375, 1888.339111328125, 364.4688720703125, 129.35458374023438, 27.857620239257812, 1789.8836669921875, -492.2596130371094, 569.8753662109375, 1449.7823486328125, 267.5521240234375, 936.3564453125, 1181.778564453125, -0.07846832275390625, 601.4099731445312, -241.73861694335938, 237.48788452148438, 130.6779327392578, 503.75048828125, 141.9752960205078, 408.8085021972656, 32.00996398925781, 192.04403686523438, 525.2164306640625, -741.2544555664062, -237.9014892578125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000531.npy"}
|
|
{"epoch": 0.7797356828193832, "step": 532, "batch_size": 64, "mean": 353.33392333984375, "std": 659.3086547851562, "min": -1337.625, "p10": -447.53729248046875, "median": 294.3863067626953, "p90": 1231.153649902344, "max": 1927.8829345703125, "pos_frac": 0.734375, "sample": [1175.70556640625, 478.93511962890625, 1581.2039794921875, -260.1360778808594, 1277.7781982421875, 231.95297241210938, 817.4222412109375, 817.1458740234375, -1337.625, 285.23809814453125, -358.2615966796875, 1475.2607421875, 491.5278015136719, 765.7198486328125, -449.0484619140625, 755.606201171875, 570.091064453125, -1010.229736328125, 71.12669372558594, 332.0802917480469, 421.8810119628906, 1599.849609375, 1927.8829345703125, 850.7451782226562, 612.656005859375, 604.4139404296875, -125.21424865722656, 1084.010498046875, 1254.9171142578125, 188.4952392578125, -122.34406280517578, 313.2911682128906, -157.72579956054688, 953.99072265625, 890.3470458984375, 126.20046997070312, 201.85324096679688, 1060.3463134765625, -74.36197662353516, 292.1464538574219, 55.92009735107422, 836.08935546875, 198.3609619140625, 585.0777587890625, 154.31959533691406, -88.87528228759766, 29.90363311767578, 746.1126098632812, 188.68081665039062, 11.477714538574219, -528.0750732421875, -732.0263061523438, 269.75390625, 1627.185791015625, 296.62615966796875, -101.8619384765625, 359.3641662597656, -933.00390625, -19.36920166015625, 215.62857055664062, -444.01123046875, 616.870361328125, -679.0523071289062, 333.3998718261719], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000532.npy"}
|
|
{"epoch": 0.7812041116005873, "step": 533, "batch_size": 64, "mean": 262.97039794921875, "std": 572.2893676757812, "min": -1517.08251953125, "p10": -325.80812377929686, "median": 221.8073272705078, "p90": 933.852752685547, "max": 2138.424072265625, "pos_frac": 0.65625, "sample": [613.7100830078125, -384.10711669921875, 785.398681640625, -525.4921264648438, 447.44525146484375, 203.61984252929688, 1182.235595703125, 590.59375, 256.7889404296875, 411.04534912109375, 775.058837890625, 638.0210571289062, 620.946533203125, 660.0101928710938, -165.18173217773438, 423.0999450683594, 945.3965454101562, 172.14920043945312, 344.7267150878906, -843.6170654296875, 1279.1087646484375, 262.7763671875, -181.72222900390625, 555.899169921875, 42.01747512817383, 619.310791015625, -51.309913635253906, 145.70176696777344, 226.21881103515625, 2138.424072265625, 445.30780029296875, 396.9035949707031, -89.3179931640625, -37.882652282714844, -22.13656997680664, -1517.08251953125, -347.4946594238281, 417.9190979003906, -255.22677612304688, 440.8414611816406, 157.5937042236328, 147.70469665527344, 1293.67919921875, 1130.739990234375, -193.11593627929688, -8.068252563476562, -305.17901611328125, -4.7342529296875, 326.6734924316406, 217.39584350585938, 334.8729553222656, -151.00390625, 906.917236328125, 132.771240234375, -334.649169921875, -291.38336181640625, -835.4617919921875, 199.49267578125, 207.16683959960938, 1189.421875, -226.15997314453125, -38.789337158203125, 808.5484008789062, 545.5675659179688], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000533.npy"}
|
|
{"epoch": 0.7826725403817915, "step": 534, "batch_size": 64, "mean": 365.05377197265625, "std": 560.8082275390625, "min": -896.5723266601562, "p10": -248.14581604003905, "median": 257.7046127319336, "p90": 1023.8048217773438, "max": 1986.0703125, "pos_frac": 0.78125, "sample": [459.1802978515625, 1018.93310546875, -40.167205810546875, 1986.0703125, 42.357364654541016, 895.0958862304688, -493.40045166015625, 243.02316284179688, 282.487060546875, -524.9461059570312, 63.153053283691406, 1227.774658203125, -246.76895141601562, 178.17727661132812, 1261.51611328125, 883.1704711914062, 915.5156860351562, -350.3030700683594, 396.6980895996094, 833.769775390625, 319.04156494140625, -237.91867065429688, 637.97607421875, 454.75897216796875, 244.82273864746094, 389.2220458984375, 1025.8927001953125, -66.64608764648438, 456.01129150390625, 175.30050659179688, 162.885498046875, 882.5755615234375, 270.58648681640625, 24.546958923339844, 700.4519653320312, -527.08203125, -139.00167846679688, 1758.4130859375, 821.9480590820312, 1037.66552734375, 157.560546875, 188.81748962402344, -59.751708984375, 151.04159545898438, 781.0403442382812, 200.83409118652344, 113.37855529785156, 851.2890625, 1501.58740234375, 934.2128295898438, 709.0797729492188, 374.9386291503906, 643.26416015625, 44.68986892700195, 392.7078552246094, 179.3316650390625, 312.8151550292969, 228.66268920898438, 239.34945678710938, 19.337343215942383, -199.58377075195312, -896.5723266601562, -248.73590087890625, -678.6397094726562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000534.npy"}
|
|
{"epoch": 0.7841409691629956, "step": 535, "batch_size": 64, "mean": 419.6590576171875, "std": 565.7296752929688, "min": -845.811279296875, "p10": -203.88623352050777, "median": 409.71868896484375, "p90": 1062.5039306640626, "max": 2399.271728515625, "pos_frac": 0.796875, "sample": [-343.8468017578125, 2399.271728515625, -329.53515625, 542.627685546875, -173.94422912597656, 236.12835693359375, 114.53466796875, 984.311279296875, 133.39251708984375, 627.3319091796875, 246.67100524902344, -71.34749603271484, 260.3093566894531, 912.465087890625, 465.3136901855469, 403.83526611328125, 128.51669311523438, 1308.688720703125, 291.4200134277344, 435.05548095703125, 1022.28955078125, 551.8264770507812, 436.8790283203125, 487.1650695800781, 19.704605102539062, 1555.235595703125, 1176.1851806640625, 459.98223876953125, -57.570281982421875, 325.04742431640625, 1492.1575927734375, 770.0802001953125, 1016.9637451171875, -355.1266174316406, -329.0292663574219, -845.811279296875, 485.3384094238281, 812.9666137695312, 77.95953369140625, 415.60211181640625, 366.46478271484375, 1079.1400146484375, 852.536376953125, 211.11590576171875, 571.9827270507812, -104.90495300292969, 50.43657684326172, 747.226318359375, -216.71852111816406, -559.7291259765625, 607.9962158203125, 64.7069320678711, 531.1304931640625, 63.263877868652344, -137.94065856933594, -96.7967529296875, 1023.6864013671875, 185.89840698242188, 1705.3486328125, 269.23779296875, 29.732023239135742, 513.3240966796875, 432.36749267578125, 579.629638671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000535.npy"}
|
|
{"epoch": 0.7856093979441997, "step": 536, "batch_size": 64, "mean": 498.6971435546875, "std": 707.3242797851562, "min": -1274.9525146484375, "p10": -109.56207046508787, "median": 295.2587585449219, "p90": 1205.6970947265627, "max": 3922.043212890625, "pos_frac": 0.828125, "sample": [201.2278594970703, 784.9376831054688, 312.1182861328125, 1018.712890625, 1299.338134765625, 48.14805603027344, 182.14605712890625, 165.02349853515625, 61.86391830444336, -29.99114990234375, 953.18896484375, 328.5274963378906, 1218.65869140625, 874.6253662109375, -272.8618469238281, 527.6279907226562, 1270.453125, 100.86666870117188, 887.6314697265625, 855.4147338867188, 2220.034423828125, 598.0836181640625, -115.5710678100586, 835.331298828125, -154.06185913085938, 1276.60546875, 28.37279510498047, 608.3909912109375, 747.1201171875, -95.54107666015625, 3922.043212890625, 803.6033935546875, 302.42901611328125, 181.86178588867188, 505.63214111328125, 803.3383178710938, 126.47282409667969, 1148.9749755859375, -422.2985534667969, 63.34817123413086, 138.79212951660156, -1274.9525146484375, 1635.86376953125, 993.4776611328125, 786.3125, -224.38816833496094, 124.6798095703125, 258.379150390625, 288.0885009765625, 123.419189453125, 218.17010498046875, -67.28412628173828, 281.6321105957031, 230.05975341796875, 248.68515014648438, 757.4921264648438, 150.23231506347656, -66.02030944824219, -318.333251953125, 8.999053955078125, 1094.8800048828125, 791.6634521484375, 389.489990234375, 1175.453369140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000536.npy"}
|
|
{"epoch": 0.7870778267254038, "step": 537, "batch_size": 64, "mean": 343.6505126953125, "std": 581.0789184570312, "min": -1239.528076171875, "p10": -300.3589782714844, "median": 276.2516174316406, "p90": 1123.7184692382814, "max": 1372.5775146484375, "pos_frac": 0.796875, "sample": [-1239.528076171875, -786.736083984375, 16.260162353515625, 216.30352783203125, -543.3525390625, 288.763916015625, 27.32612419128418, 963.4622192382812, 12.97906494140625, -42.892459869384766, 484.8427429199219, -9.311721801757812, 520.004150390625, 447.86846923828125, -300.6064453125, 914.0291748046875, 1194.1917724609375, 39.313201904296875, 125.03894805908203, -14.003837585449219, 36.46197509765625, 972.7952270507812, 56.99091339111328, -787.650390625, 1092.6234130859375, -299.78155517578125, 1372.5775146484375, 1211.418212890625, 368.496826171875, 569.1663818359375, 1071.4530029296875, 404.90411376953125, 1025.978271484375, 307.08013916015625, 827.723876953125, 17.706947326660156, 108.02201843261719, 582.6873168945312, 263.73931884765625, -510.111572265625, 251.76547241210938, 491.8216857910156, -88.08818054199219, -899.907470703125, 27.705638885498047, 1137.044921875, -53.70651626586914, 192.14759826660156, 328.7046203613281, 824.0411987304688, 170.28759765625, 1369.56201171875, 117.19298553466797, 1356.8101806640625, 560.0243530273438, 54.801292419433594, 1270.6387939453125, 105.60324096679688, 1019.8931884765625, 392.2275390625, 539.5441284179688, 705.4296264648438, 36.97991943359375, 1076.8740234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000537.npy"}
|
|
{"epoch": 0.788546255506608, "step": 538, "batch_size": 64, "mean": 376.39959716796875, "std": 511.9606628417969, "min": -552.5037231445312, "p10": -215.6068115234375, "median": 355.06573486328125, "p90": 1008.263348388672, "max": 1978.1910400390625, "pos_frac": 0.75, "sample": [160.3732452392578, 504.5670166015625, 742.86669921875, 1129.33203125, 519.4118041992188, -219.21160888671875, 914.7882080078125, 597.4862060546875, 542.2337036132812, 467.1681823730469, -107.39781951904297, 1680.858154296875, 575.2884521484375, 378.4991455078125, 222.3273162841797, 979.0225830078125, -492.54150390625, 3.6018600463867188, 331.63232421875, 1105.1634521484375, 255.3307647705078, 496.8849182128906, 118.88198852539062, 660.1725463867188, 605.2984619140625, 79.94087219238281, -120.1480712890625, 1428.219970703125, -26.44891357421875, -384.9170227050781, -552.5037231445312, 801.7095336914062, 176.17323303222656, -105.13041687011719, 1978.1910400390625, 691.998779296875, 841.4491577148438, 584.1045532226562, 315.8437805175781, 661.828857421875, 176.8896484375, -145.51910400390625, 11.205402374267578, 619.1254272460938, 216.52609252929688, 668.4100341796875, -474.0527038574219, 513.016357421875, -314.2940368652344, 107.39154052734375, 609.9156494140625, 466.08880615234375, 323.23486328125, 697.7890014648438, -202.0356903076172, -52.174461364746094, 227.369140625, 58.37604904174805, 1092.6748046875, 1020.7951049804688, -316.66766357421875, -207.19561767578125, 558.9285888671875, -108.57471466064453], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000538.npy"}
|
|
{"epoch": 0.7900146842878121, "step": 539, "batch_size": 64, "mean": 360.6948547363281, "std": 478.0931701660156, "min": -748.0493774414062, "p10": -146.54980773925777, "median": 277.2389831542969, "p90": 932.4374389648442, "max": 1704.988037109375, "pos_frac": 0.765625, "sample": [267.63653564453125, 286.8414306640625, 92.72823333740234, -13.927858352661133, 395.9190673828125, 415.8023681640625, -212.4055938720703, 57.24170684814453, -28.017852783203125, 1445.6083984375, -248.92193603515625, -25.797067642211914, 1704.988037109375, 152.46180725097656, 984.4384765625, 637.3468017578125, 732.0162963867188, 721.9188842773438, 80.412841796875, 777.13525390625, 1561.462158203125, 103.931640625, 196.2885284423828, 732.3870849609375, 183.10238647460938, 198.16177368164062, -15.280288696289062, 459.122802734375, -116.99842834472656, 715.5145263671875, -399.1960144042969, 679.6966552734375, -748.0493774414062, 515.109130859375, 639.1776123046875, 101.1256103515625, 63.615943908691406, 547.9242553710938, 219.3844451904297, -117.46347045898438, -68.15614318847656, 1399.8416748046875, -208.73226928710938, 672.668701171875, -41.81145095825195, -159.015380859375, 999.4547729492188, -254.52183532714844, 60.322166442871094, 13.324867248535156, 608.8201904296875, 248.81008911132812, 775.8297119140625, 346.4149475097656, 519.8871459960938, 301.03631591796875, 811.1016845703125, 551.6574096679688, 424.50689697265625, 407.147216796875, 1.2948436737060547, 1234.3780517578125, 546.5258178710938, 151.2425994873047], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000539.npy"}
|
|
{"epoch": 0.7914831130690162, "step": 540, "batch_size": 64, "mean": 470.0218505859375, "std": 662.1915893554688, "min": -859.353515625, "p10": -357.0869323730467, "median": 335.1390075683594, "p90": 1401.9480346679688, "max": 2555.187744140625, "pos_frac": 0.765625, "sample": [517.7066650390625, 586.4639282226562, 770.8077392578125, -448.96160888671875, 1971.2681884765625, -190.52764892578125, 335.5509338378906, 151.50921630859375, -859.353515625, -506.2259521484375, -72.96976470947266, 546.265869140625, -159.20924377441406, 264.23748779296875, 2555.187744140625, -61.25220489501953, 255.1068115234375, 100.39114379882812, 595.7591552734375, 686.015380859375, -88.49502563476562, 135.82281494140625, 419.1116027832031, 824.5960693359375, 798.32666015625, 1370.4002685546875, 631.1587524414062, 1415.468505859375, 1562.44287109375, 334.7270812988281, 562.1978149414062, 435.303466796875, 713.6873779296875, 66.8439712524414, 302.21087646484375, 484.4067687988281, -66.82962036132812, -16.6636962890625, 1601.3443603515625, 271.6595764160156, 1103.8536376953125, 1835.956787109375, 1586.2120361328125, 1310.4708251953125, -520.2887573242188, 689.1151123046875, 227.82510375976562, 264.22137451171875, 755.2992553710938, 315.34442138671875, -428.469482421875, 306.4263000488281, 704.2918701171875, 860.40185546875, 190.16864013671875, 300.9526672363281, 770.1724853515625, 181.97415161132812, 1348.41748046875, -37.373573303222656, 8.089942932128906, 659.7147216796875, -696.7598266601562, -450.1092224121094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000540.npy"}
|
|
{"epoch": 0.7929515418502202, "step": 541, "batch_size": 64, "mean": 243.10484313964844, "std": 569.4824829101562, "min": -1172.068603515625, "p10": -474.44540405273426, "median": 207.50424194335938, "p90": 891.2317443847658, "max": 1939.8819580078125, "pos_frac": 0.75, "sample": [-831.691162109375, 190.78756713867188, 641.302978515625, 112.75155639648438, 161.8907470703125, 95.32662963867188, 291.2289733886719, 78.49775695800781, 237.62451171875, -53.4432373046875, -220.36642456054688, 1322.46728515625, -777.7163696289062, 34.56190490722656, 935.3826904296875, 76.87271881103516, 223.412353515625, 1207.531982421875, 111.2328109741211, 786.205322265625, 1522.1676025390625, 850.227783203125, 13.882476806640625, -620.66748046875, 183.24493408203125, -251.4791259765625, 406.79412841796875, 311.4871826171875, -515.334228515625, 684.668212890625, 646.5108642578125, 62.49219512939453, 751.5848388671875, 180.3641815185547, 213.63821411132812, 158.7178955078125, 192.687744140625, 214.90882873535156, 654.4566650390625, 459.87860107421875, -128.84800720214844, 171.30335998535156, 787.238037109375, 908.8048706054688, 651.1026611328125, 277.7889404296875, -1172.068603515625, -555.8770751953125, 451.5494079589844, 594.7042236328125, -333.9478454589844, 380.3176574707031, 201.37026977539062, 1939.8819580078125, 1169.124267578125, -379.03814697265625, -894.1602172851562, 483.5469055175781, -332.8901062011719, -215.41006469726562, -206.55572509765625, 219.07769775390625, 544.9915161132812, 252.61183166503906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000541.npy"}
|
|
{"epoch": 0.7944199706314243, "step": 542, "batch_size": 64, "mean": 463.46759033203125, "std": 635.0188598632812, "min": -1116.8154296875, "p10": -269.2774948120117, "median": 417.2616882324219, "p90": 1201.4050659179688, "max": 2205.59130859375, "pos_frac": 0.796875, "sample": [982.21484375, 363.800537109375, 229.41278076171875, 292.3572692871094, 198.02972412109375, 988.2560424804688, -364.14459228515625, 2205.59130859375, 1210.9788818359375, 474.92962646484375, 14.499618530273438, 539.19873046875, 863.9873657226562, -16.877655029296875, -87.7949447631836, 463.2091064453125, 1944.5213623046875, 319.67486572265625, -143.28314208984375, 1667.851318359375, 331.3481750488281, 129.13636779785156, 441.50665283203125, 483.29620361328125, -381.5693664550781, 208.0402069091797, -89.0460205078125, -411.049072265625, -241.14540100097656, 991.1400146484375, -281.3341064453125, 853.1807861328125, 864.9834594726562, 177.29049682617188, 1708.2392578125, -98.57461547851562, 685.43310546875, 519.7274169921875, -577.386962890625, 687.0142822265625, 349.44354248046875, 486.28411865234375, 675.775634765625, 123.5707015991211, 1699.424560546875, 175.79428100585938, 278.6153869628906, -1116.8154296875, 740.8146362304688, 175.72885131835938, 745.5585327148438, 333.21295166015625, 106.16755676269531, 441.5783996582031, 1634.6883544921875, 1031.515869140625, 393.0167236328125, -934.7445678710938, 1179.066162109375, 327.3723449707031, 467.7210998535156, 660.568359375, 678.39013671875, 862.5338134765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000542.npy"}
|
|
{"epoch": 0.7958883994126285, "step": 543, "batch_size": 64, "mean": 326.91180419921875, "std": 754.9144897460938, "min": -987.5075073242188, "p10": -375.7522064208984, "median": 147.3399887084961, "p90": 1320.7195434570317, "max": 3345.689453125, "pos_frac": 0.65625, "sample": [21.29511260986328, -6.2955322265625, 805.4842529296875, -25.35723876953125, 7.807638168334961, 2277.112548828125, 4.307374954223633, -43.18782043457031, -987.5075073242188, 712.7413330078125, -70.77677154541016, -803.2081298828125, 74.25897216796875, 194.3218994140625, 2243.584228515625, 57.04164123535156, 38.89508056640625, 3345.689453125, -590.438232421875, 24.94062042236328, 503.3035888671875, -400.33111572265625, -311.19586181640625, 198.10948181152344, -442.8782043457031, -91.5119400024414, -106.360595703125, 561.95849609375, -441.79754638671875, 219.7646026611328, 589.5338134765625, 221.49562072753906, -315.89984130859375, 1361.462890625, -148.90914916992188, -174.2670440673828, 1938.4560546875, 616.1412353515625, 1225.6517333984375, 90.16613006591797, 1568.388427734375, 134.8728790283203, -228.90122985839844, 327.08123779296875, 816.9239501953125, -32.29810333251953, 375.66363525390625, 1782.5184326171875, -383.90716552734375, 624.1568603515625, 227.53005981445312, -199.40350341796875, 336.58026123046875, -356.7239685058594, 490.72479248046875, 603.579345703125, 301.8289489746094, -123.22501373291016, 40.32548522949219, 402.1224060058594, 159.80709838867188, 540.8240966796875, 629.0311889648438, 511.2532958984375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000543.npy"}
|
|
{"epoch": 0.7973568281938326, "step": 544, "batch_size": 64, "mean": 387.87322998046875, "std": 581.82373046875, "min": -982.05126953125, "p10": -181.7893585205078, "median": 369.4586639404297, "p90": 1155.7386718750001, "max": 1894.2705078125, "pos_frac": 0.765625, "sample": [1171.8990478515625, 824.0594482421875, 356.71728515625, 490.4412536621094, 1623.3502197265625, -982.05126953125, 382.2000427246094, -98.45992279052734, 777.7197265625, -47.148040771484375, 1286.902099609375, -100.56867980957031, 751.9364013671875, 239.07093811035156, -968.8248291015625, 356.3995666503906, 682.7686157226562, 227.877685546875, 56.09661865234375, 396.5888977050781, 414.46380615234375, 389.03717041015625, -240.85812377929688, 277.72528076171875, 71.82852172851562, 155.1586456298828, 776.5267333984375, 76.12567138671875, -316.192626953125, -196.97219848632812, 1634.509765625, 198.87689208984375, -81.85973358154297, 638.1204833984375, -973.201171875, 1118.0311279296875, 477.074951171875, 383.2337646484375, 637.8739013671875, -146.36273193359375, 253.2340087890625, 13.540279388427734, 274.8922424316406, 1609.4326171875, 690.6228637695312, -73.26506042480469, 159.2645721435547, 1894.2705078125, 922.616455078125, 576.2346801757812, 5.493648529052734, 160.26968383789062, 766.1618041992188, 1452.5972900390625, 633.8407592773438, 428.3006896972656, 485.8376770019531, -128.44334411621094, 526.9863891601562, 129.26170349121094, -25.918075561523438, 780.2572021484375, -207.16378784179688, 775.4453735351562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000544.npy"}
|
|
{"epoch": 0.7988252569750367, "step": 545, "batch_size": 64, "mean": 454.80206298828125, "std": 572.7142333984375, "min": -675.2298583984375, "p10": -79.63759384155271, "median": 303.20289611816406, "p90": 1387.784069824219, "max": 1834.6236572265625, "pos_frac": 0.859375, "sample": [-113.52804565429688, 32.063079833984375, -675.2298583984375, 80.09652709960938, 267.7671203613281, 846.8375244140625, 181.2099609375, 104.89640808105469, 544.8445434570312, -351.5038757324219, 498.5699157714844, 711.41650390625, 4.100212097167969, 644.1480102539062, 1246.6837158203125, 146.78594970703125, 1611.574462890625, 1336.9635009765625, 1396.499267578125, 372.7393493652344, 778.976806640625, 71.95205688476562, 262.64599609375, 210.3141632080078, 98.6971664428711, 321.6940002441406, 86.06787109375, 474.34405517578125, 884.4534912109375, -54.403526306152344, 1477.77099609375, 425.92822265625, 107.96212005615234, -90.45219421386719, 1834.6236572265625, 1030.4071044921875, 55.99861145019531, 1415.669189453125, -634.105712890625, 451.8621826171875, -40.013755798339844, 519.8223266601562, 108.97604370117188, 650.62255859375, 373.49462890625, 673.955322265625, 911.521484375, 1367.4486083984375, 284.7117919921875, 196.36151123046875, 154.55165100097656, 81.88386535644531, 284.4233093261719, 405.1966857910156, 146.28457641601562, 1592.3358154296875, 1323.696044921875, -233.21609497070312, 406.4678039550781, 209.0445556640625, 652.3948974609375, -668.762939453125, 147.41195678710938, 1461.378662109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000545.npy"}
|
|
{"epoch": 0.8002936857562408, "step": 546, "batch_size": 64, "mean": 296.3592529296875, "std": 490.6263732910156, "min": -1588.97607421875, "p10": -232.52848358154296, "median": 275.9789581298828, "p90": 881.8617187500001, "max": 1259.843994140625, "pos_frac": 0.734375, "sample": [-176.75595092773438, 109.42194366455078, 410.6649169921875, -11.926788330078125, 178.59381103515625, 492.69952392578125, 271.46588134765625, 578.0330810546875, 183.4984588623047, 221.21142578125, -202.31399536132812, -215.40594482421875, -156.64122009277344, -129.64373779296875, 763.1103515625, 110.45414733886719, 10.356361389160156, 272.1781921386719, 918.58447265625, 279.77972412109375, -102.00255584716797, 862.31298828125, -277.7548522949219, 1223.4613037109375, 664.2929077148438, 850.275634765625, 805.9254760742188, 461.88519287109375, 890.23974609375, 309.8215026855469, 1259.843994140625, -239.86671447753906, -4.480583190917969, 446.3971862792969, 470.2625732421875, -782.6096801757812, 711.0505981445312, 527.110107421875, 765.5849609375, 912.4254760742188, -1588.97607421875, 753.432373046875, 220.19384765625, 280.5586242675781, 479.3079528808594, 776.2587280273438, 53.721923828125, 787.4147338867188, -60.346656799316406, 156.65484619140625, 60.77191925048828, 115.80828094482422, 720.3748779296875, 285.4281921386719, -277.0827941894531, 559.5274047851562, 122.67684936523438, 1078.0635986328125, -289.4101867675781, 120.6552963256836, -56.50489807128906, 390.6654052734375, -353.34613037109375, 969.6035766601562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000546.npy"}
|
|
{"epoch": 0.801762114537445, "step": 547, "batch_size": 64, "mean": 391.1941223144531, "std": 549.2232055664062, "min": -1126.350830078125, "p10": -135.22178726196287, "median": 389.0903625488281, "p90": 1002.0400756835937, "max": 2130.188720703125, "pos_frac": 0.765625, "sample": [542.8841552734375, 61.00525665283203, -1126.350830078125, 1004.90966796875, -43.75886154174805, 22.733673095703125, 326.3190002441406, 692.802978515625, 180.770751953125, 423.7643737792969, 155.60089111328125, 1689.767578125, 38.845035552978516, 1769.9881591796875, -50.88517761230469, -126.55823516845703, 532.93896484375, 710.0376586914062, -165.16734313964844, 995.3443603515625, -54.75603485107422, 77.58856964111328, 460.11163330078125, 260.33953857421875, 611.92822265625, -214.0562744140625, 578.0662231445312, 240.0568084716797, 216.91188049316406, 377.5715026855469, 945.9178466796875, -138.9347381591797, 448.5862731933594, 669.5656127929688, 597.1087036132812, 450.54095458984375, -122.42576599121094, 866.6150512695312, 400.6092224121094, 39.43914794921875, 693.8662719726562, 2130.188720703125, 662.9603881835938, -573.7457275390625, -256.7665710449219, 22.441612243652344, -358.093994140625, 1146.4019775390625, 95.45109558105469, 562.1854248046875, 1512.497802734375, 235.6078338623047, 534.7398681640625, 430.11370849609375, 213.44288635253906, 317.16558837890625, 775.7453002929688, -62.8289794921875, 400.76922607421875, -73.44943237304688, 726.451171875, 1214.918701171875, 420.9427490234375, -80.35883331298828], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000547.npy"}
|
|
{"epoch": 0.8032305433186491, "step": 548, "batch_size": 64, "mean": 356.83355712890625, "std": 596.674560546875, "min": -966.30224609375, "p10": -230.15341491699218, "median": 298.69114685058594, "p90": 872.4000305175782, "max": 2886.806884765625, "pos_frac": 0.75, "sample": [-15.185836791992188, 876.478759765625, 309.90374755859375, 335.5822448730469, -109.27625274658203, 358.4631652832031, -966.30224609375, 1326.7025146484375, 426.88775634765625, 396.9737854003906, 421.2369384765625, 544.6804809570312, 1102.2501220703125, 233.05145263671875, 730.9867553710938, -234.22607421875, -130.29603576660156, 356.5706787109375, 897.7694091796875, 204.38577270507812, 862.8829956054688, 286.8012390136719, 204.2794189453125, -395.83502197265625, 485.3228454589844, 65.94405364990234, 173.94337463378906, 333.41143798828125, 287.4785461425781, 167.7042999267578, -335.8299255371094, 2886.806884765625, 792.196044921875, 229.46484375, 121.22412109375, -478.05963134765625, -72.34742736816406, 684.8429565429688, -220.65054321289062, 2219.790771484375, 41.104793548583984, -61.22793197631836, -283.62628173828125, 473.576416015625, -117.31069946289062, 69.94326782226562, 474.58575439453125, 141.4427490234375, 387.8700256347656, -241.24972534179688, 480.48126220703125, 396.9397277832031, -22.326372146606445, 1935.27197265625, 416.66168212890625, 474.165283203125, 107.52540588378906, -15.696075439453125, 311.70208740234375, 267.8664855957031, 55.717185974121094, 561.47607421875, 819.5535278320312, 796.89111328125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000548.npy"}
|
|
{"epoch": 0.8046989720998532, "step": 549, "batch_size": 64, "mean": 274.5236511230469, "std": 545.8773193359375, "min": -1029.5499267578125, "p10": -273.37737121582023, "median": 166.56822204589844, "p90": 982.442205810547, "max": 1722.9912109375, "pos_frac": 0.671875, "sample": [288.0493469238281, 850.3916625976562, 88.43263244628906, -47.085548400878906, 117.13615417480469, 67.29150390625, -153.6485137939453, 507.50799560546875, 713.4424438476562, 988.0467529296875, -192.4073028564453, 1319.751708984375, 17.125526428222656, 164.91299438476562, -94.22787475585938, -328.2698974609375, 969.3649291992188, -339.45294189453125, -174.79595947265625, -788.941162109375, 624.6365966796875, 1079.8182373046875, -311.88311767578125, 316.27618408203125, 348.7323913574219, 286.0274353027344, 439.27825927734375, -48.69951629638672, 763.3671875, 943.51025390625, 31.82318115234375, 233.41392517089844, 277.8026123046875, -213.10745239257812, 398.1512145996094, -1029.5499267578125, 123.1797866821289, 599.1412353515625, 74.06331634521484, 14.407978057861328, -41.064239501953125, -17.729576110839844, 338.828857421875, 729.0468139648438, 1281.3966064453125, -106.50617980957031, -172.8763885498047, 330.54302978515625, -299.20733642578125, 1662.1107177734375, 1416.4364013671875, -772.2379760742188, -71.51747131347656, 683.61767578125, 432.12554931640625, -129.57168579101562, 153.92822265625, 168.22344970703125, 609.9144287109375, 1722.9912109375, 411.1927490234375, 301.93511962890625, 142.2155303955078, -127.29679107666016], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000549.npy"}
|
|
{"epoch": 0.8061674008810573, "step": 550, "batch_size": 64, "mean": 334.7535400390625, "std": 521.6289672851562, "min": -1098.598876953125, "p10": -275.55433349609365, "median": 326.0226593017578, "p90": 927.3974731445314, "max": 1976.849609375, "pos_frac": 0.8125, "sample": [392.3042907714844, 327.9276428222656, 1111.9212646484375, 385.7474060058594, 301.2383117675781, 798.7841186523438, 1040.18359375, 313.81817626953125, -148.82786560058594, 208.59605407714844, 581.6221313476562, 390.11865234375, 548.0469970703125, 149.49539184570312, 324.11767578125, 1816.4410400390625, 66.15764617919922, -315.5877990722656, 81.33815002441406, 32.915714263916016, -1098.598876953125, -6.4909210205078125, 483.691162109375, 875.9325561523438, -11.451370239257812, 944.5225219726562, 887.4390258789062, 112.95368194580078, 247.08331298828125, 102.06370544433594, 38.80682373046875, -646.4445190429688, 482.75537109375, -127.53438568115234, 387.2864685058594, 803.3765258789062, 68.02980041503906, 520.967041015625, 1976.849609375, 450.9201965332031, 253.0420379638672, 621.9276123046875, 139.24440002441406, 46.175994873046875, 1370.9317626953125, 395.7398681640625, 1066.9095458984375, 140.50794982910156, 196.81967163085938, 618.3230590820312, 436.4845275878906, 638.5109252929688, -644.363525390625, 540.2120971679688, -182.14291381835938, 375.1786193847656, 274.80120849609375, 512.1046142578125, 382.0002746582031, -392.60540771484375, 485.9368896484375, -360.33099365234375, -482.1448974609375, 92.44633483886719], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000550.npy"}
|
|
{"epoch": 0.8076358296622613, "step": 551, "batch_size": 64, "mean": 542.4798583984375, "std": 538.1138916015625, "min": -470.8398742675781, "p10": -60.906350326538075, "median": 482.7066192626953, "p90": 1391.8273681640626, "max": 1931.3369140625, "pos_frac": 0.796875, "sample": [-5.835536956787109, -32.33473205566406, 426.9822692871094, 805.6148071289062, 681.63330078125, 620.0479125976562, -470.8398742675781, -8.444469451904297, 1394.7528076171875, -283.9488525390625, -22.46161651611328, 397.13739013671875, -64.91175842285156, 577.5811157226562, 584.0706176757812, 1612.8284912109375, 479.5343933105469, 342.946533203125, 581.6904907226562, 1781.1085205078125, 967.30126953125, 349.3470458984375, 515.884765625, 627.8111572265625, 311.20147705078125, 298.6707763671875, 983.3573608398438, 392.25299072265625, 657.7570190429688, 346.4442138671875, 369.1396789550781, -51.56039810180664, 1538.12744140625, 166.29034423828125, 481.89263916015625, 645.9073486328125, 773.9271850585938, 687.4267578125, 549.113037109375, 89.68016052246094, -178.08114624023438, 1057.6641845703125, 1385.0013427734375, 483.5205993652344, -33.29680252075195, 1567.206298828125, -295.1681213378906, -299.1285095214844, 1456.3692626953125, 854.3090209960938, -281.9283752441406, 152.45858764648438, 1382.500244140625, 876.17724609375, 477.241455078125, 581.7392578125, 744.5543212890625, 345.16278076171875, 445.79498291015625, 906.7120971679688, 162.14979553222656, 1931.3369140625, 520.44970703125, 378.84051513671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000551.npy"}
|
|
{"epoch": 0.8091042584434655, "step": 552, "batch_size": 64, "mean": 605.8823852539062, "std": 716.0411376953125, "min": -757.0923461914062, "p10": -129.67884063720697, "median": 531.0127563476562, "p90": 1423.697314453125, "max": 2670.122314453125, "pos_frac": 0.859375, "sample": [881.2468872070312, 77.87262725830078, 1016.5790405273438, 2626.0712890625, 764.180908203125, 1244.829833984375, 598.3406982421875, 1232.681884765625, 33.76068115234375, 559.301025390625, 672.8270263671875, 554.4649658203125, 168.1190948486328, 514.629150390625, 189.68174743652344, 693.84375, 267.4154968261719, 664.8392333984375, 1349.1031494140625, -409.4828796386719, 44.867156982421875, 704.269287109375, 535.4219970703125, 348.3677062988281, -757.0923461914062, 4.452507019042969, 37.11674499511719, 1561.4583740234375, 251.33970642089844, -293.8394775390625, 1428.8624267578125, -640.3737182617188, 845.485107421875, -56.59803009033203, 511.5560302734375, 147.1293182373047, 495.9451904296875, 2457.918212890625, 1309.0943603515625, 1932.508544921875, 586.5800170898438, -181.46090698242188, 568.7803344726562, 939.4346923828125, 683.107177734375, 2670.122314453125, 251.87371826171875, 349.58892822265625, 1411.6453857421875, 2255.468017578125, 526.603515625, 151.76971435546875, 453.186279296875, -332.6996765136719, -68.22467041015625, 439.0570373535156, -156.01634216308594, 135.7578887939453, 434.91357421875, 565.9283447265625, 641.5528564453125, 1058.111572265625, 772.544189453125, 50.65471649169922], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000552.npy"}
|
|
{"epoch": 0.8105726872246696, "step": 553, "batch_size": 64, "mean": 303.1957702636719, "std": 568.080322265625, "min": -833.753662109375, "p10": -208.92323913574216, "median": 208.37925720214844, "p90": 988.7486877441406, "max": 2567.246826171875, "pos_frac": 0.765625, "sample": [2567.246826171875, 917.865966796875, -200.01235961914062, 494.6927795410156, 486.00506591796875, -125.12308502197266, 54.633697509765625, 9.662773132324219, 146.0420379638672, 262.5179443359375, 127.04949951171875, 251.29307556152344, 107.41413116455078, 101.48251342773438, -60.43018341064453, 262.115966796875, -546.1642456054688, 14.204864501953125, 354.24700927734375, -92.28292846679688, 249.30520629882812, 158.29151916503906, 345.8850402832031, -537.8062133789062, 555.9884643554688, -89.64503479003906, 1221.8543701171875, 81.57489013671875, 7.596015930175781, 281.5733642578125, -149.25904846191406, 454.6160583496094, -227.41522216796875, -149.89955139160156, 563.6068115234375, 321.13043212890625, 369.2000732421875, 160.389892578125, 989.93310546875, -79.2022933959961, 402.7666320800781, -833.753662109375, 569.103759765625, 509.8177185058594, 183.57943725585938, 168.802734375, -761.32861328125, 52.30560302734375, 512.0404052734375, 22.21813201904297, 1190.185546875, 12.416526794433594, 1276.4005126953125, 581.9348754882812, 903.6761474609375, 1135.335693359375, 233.1790771484375, 985.9850463867188, -576.0919799804688, 1784.781494140625, -212.7421875, 863.8297119140625, 90.62274932861328, 649.2848510742188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000553.npy"}
|
|
{"epoch": 0.8120411160058737, "step": 554, "batch_size": 64, "mean": 426.5704345703125, "std": 703.3275756835938, "min": -1008.0779418945312, "p10": -473.4370300292968, "median": 389.1060791015625, "p90": 1416.9748535156255, "max": 2249.103759765625, "pos_frac": 0.765625, "sample": [187.1820526123047, -676.9273681640625, 815.5572509765625, 678.5550537109375, 232.51712036132812, 944.30615234375, 539.994873046875, 422.6651306152344, -404.5207824707031, 417.697265625, 2249.103759765625, 427.7083740234375, 368.36859130859375, 306.7127990722656, -222.6220245361328, 498.8492431640625, 582.6455078125, 541.9190673828125, 561.062744140625, -28.890281677246094, -703.8508911132812, -695.3194580078125, -502.9725646972656, 7.242332458496094, 2108.15380859375, -212.27490234375, 246.09097290039062, 410.9778747558594, 379.53326416015625, 918.8564453125, 575.6553955078125, 1457.38623046875, 1646.337890625, 180.35601806640625, 911.0230712890625, 377.1062927246094, 860.2811279296875, 13.55117416381836, 1599.0040283203125, -150.70372009277344, -62.266510009765625, 18.23540496826172, 1194.013427734375, 608.87744140625, 1026.5382080078125, 86.46940612792969, 313.694580078125, 1.719146728515625, 889.8173217773438, 792.45458984375, -1008.0779418945312, 684.64111328125, 254.90689086914062, -633.3596801757812, 1162.143798828125, -740.1514892578125, -386.3141174316406, 36.48737335205078, 398.67889404296875, 1322.681640625, 1763.3609619140625, 1826.0538330078125, 89.6520004272461, -208.0677490234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000554.npy"}
|
|
{"epoch": 0.8135095447870778, "step": 555, "batch_size": 64, "mean": 491.5250244140625, "std": 749.5938110351562, "min": -1247.889404296875, "p10": -207.0268829345703, "median": 458.08164978027344, "p90": 1569.1054443359376, "max": 2411.475830078125, "pos_frac": 0.796875, "sample": [587.963134765625, 440.31048583984375, 1453.67724609375, 299.6122131347656, 638.6954956054688, 1252.7481689453125, 488.55523681640625, 259.6259460449219, -167.19049072265625, 180.47787475585938, -162.55125427246094, 853.6185302734375, 785.9228515625, 488.2962646484375, 17.34112548828125, 67.57267761230469, 186.16238403320312, -219.59201049804688, 2411.475830078125, 1166.7889404296875, 165.0869140625, 2167.31640625, 49.00696563720703, 539.5457763671875, 538.1559448242188, 121.70846557617188, 843.1124267578125, 35.50422668457031, 494.9866027832031, 78.43223571777344, 691.1738891601562, 1002.980224609375, 598.0340576171875, 481.7334289550781, 19.863037109375, 170.5267791748047, 743.9686279296875, -579.636474609375, 129.06167602539062, -177.708251953125, -987.6138916015625, 1158.6375732421875, 1781.6968994140625, 2352.298583984375, -536.9508056640625, 476.7707824707031, 2004.0323486328125, 123.34678649902344, 475.8528137207031, 70.97821044921875, 1564.2139892578125, -536.7680053710938, 1571.2017822265625, -1247.889404296875, -117.65045928955078, -147.04922485351562, 1782.907470703125, 553.4312744140625, -151.22970581054688, 171.9832763671875, 1178.1494140625, 330.1108703613281, -225.5068817138672, 670.28369140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000555.npy"}
|
|
{"epoch": 0.8149779735682819, "step": 556, "batch_size": 64, "mean": 300.2801208496094, "std": 445.6376647949219, "min": -444.3118896484375, "p10": -230.37686462402343, "median": 254.6309814453125, "p90": 956.1118713378908, "max": 1415.3575439453125, "pos_frac": 0.71875, "sample": [1261.0096435546875, 310.663818359375, 116.89901733398438, -237.551513671875, 292.9202575683594, 142.28338623046875, 614.5283813476562, 411.5885925292969, -200.9176483154297, 328.0389709472656, 102.6393051147461, 737.6917724609375, 283.59844970703125, -261.2091979980469, 249.5394287109375, 507.97149658203125, 1148.375244140625, -31.9801025390625, 698.2818603515625, -128.3638916015625, -444.3118896484375, -90.68010711669922, -387.6736145019531, 148.75367736816406, -415.3643798828125, -170.29266357421875, 438.6305847167969, 3.4730682373046875, 896.7881469726562, -328.8447265625, 371.42376708984375, 373.77435302734375, 312.4430236816406, 110.17017364501953, 851.7890014648438, -68.1422348022461, 421.21734619140625, 198.5607147216797, 234.99234008789062, -32.713157653808594, -89.966064453125, 830.842529296875, 183.62745666503906, 123.94593048095703, -213.63601684570312, 501.29388427734375, 1056.001953125, 479.64019775390625, 984.3713989257812, 66.1999282836914, 238.37237548828125, -147.3919677734375, 917.1633911132812, 306.66650390625, 1365.5950927734375, -14.689384460449219, 972.8040771484375, 1415.3575439453125, 488.1686096191406, 436.87469482421875, 385.4909362792969, -330.9871826171875, 259.7225341796875, 232.45965576171875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000556.npy"}
|
|
{"epoch": 0.8164464023494861, "step": 557, "batch_size": 64, "mean": 433.4267883300781, "std": 699.504150390625, "min": -1455.416748046875, "p10": -315.915869140625, "median": 321.45404052734375, "p90": 1316.5631835937502, "max": 2581.6357421875, "pos_frac": 0.765625, "sample": [-1455.416748046875, 78.06394958496094, -1026.26953125, 897.1573486328125, 342.06719970703125, 270.6610107421875, 115.94187927246094, 532.221923828125, 678.7201538085938, 96.17868041992188, 944.5020141601562, 1341.0029296875, 999.0634155273438, 55.4454345703125, 111.77191162109375, 300.84088134765625, 471.05780029296875, 849.3474731445312, -85.72247314453125, 100.79285430908203, 80.30702209472656, 1259.537109375, 1659.31591796875, 650.9685668945312, -18.55157470703125, 550.9232177734375, 1.6832275390625, -336.31390380859375, -222.1846923828125, 753.9906005859375, 150.34564208984375, 687.7049560546875, 1118.3509521484375, 470.3543701171875, 259.2738037109375, 223.7104034423828, 432.0481872558594, -564.4736938476562, 885.6127319335938, 569.6510620117188, 741.5371704101562, -319.23004150390625, 477.9671630859375, 135.4947967529297, 146.8380889892578, -488.19873046875, 744.236083984375, 1630.84814453125, -346.3308410644531, 2581.6357421875, -72.96220397949219, 2265.751708984375, 1596.11669921875, 155.14279174804688, 1068.204833984375, -19.260696411132812, -251.2509765625, 1441.9097900390625, -308.18280029296875, 1166.9512939453125, -66.19173431396484, 496.8206481933594, 447.586669921875, 284.1986083984375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000557.npy"}
|
|
{"epoch": 0.8179148311306902, "step": 558, "batch_size": 64, "mean": 589.447265625, "std": 618.3509521484375, "min": -1575.6715087890625, "p10": -93.48960571289062, "median": 495.21099853515625, "p90": 1367.0212280273438, "max": 1856.903564453125, "pos_frac": 0.828125, "sample": [405.07208251953125, 1108.025146484375, 236.13243103027344, 1328.80517578125, 178.36306762695312, 339.9713439941406, 539.5537109375, 17.394790649414062, 905.7674560546875, -327.4811706542969, -146.09371948242188, 1196.115478515625, 1291.2784423828125, 1157.1751708984375, 471.42535400390625, 1149.3033447265625, 667.0209350585938, 320.9509582519531, 1727.9578857421875, 1601.885986328125, -239.24029541015625, 385.4955139160156, 347.583984375, 1272.570556640625, -30.69916534423828, 698.3583984375, 1383.3995361328125, 466.32879638671875, 1407.193359375, 525.078125, 385.4077453613281, 600.1517333984375, 950.7102661132812, 1243.5985107421875, 1387.7701416015625, 1015.781005859375, -1575.6715087890625, -95.66134643554688, 383.4581604003906, 142.85353088378906, 372.8977966308594, 5.134920120239258, 135.57786560058594, 404.33013916015625, 495.08746337890625, -66.68916320800781, -120.57368469238281, 1856.903564453125, 1140.9449462890625, -0.2213726043701172, 783.1199951171875, 1094.1383056640625, 626.0047607421875, 478.6346740722656, 1031.9716796875, -632.0043334960938, 771.008544921875, 330.8305969238281, 495.33453369140625, -88.42221069335938, 219.75238037109375, 1503.16162109375, 1005.058349609375, 1059.552734375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000558.npy"}
|
|
{"epoch": 0.8193832599118943, "step": 559, "batch_size": 64, "mean": 497.17767333984375, "std": 637.0014038085938, "min": -827.7745361328125, "p10": -110.45960922241211, "median": 337.32310485839844, "p90": 1261.9763916015627, "max": 2372.28759765625, "pos_frac": 0.8125, "sample": [983.3463745117188, 1349.4493408203125, 662.4190673828125, 675.33056640625, 124.23108673095703, 50.322906494140625, 147.16725158691406, 171.8334503173828, 1290.920166015625, 2372.28759765625, 1178.38330078125, 404.189697265625, 1109.72509765625, 52.041107177734375, 114.87332153320312, 776.459716796875, 1019.5462646484375, 1383.83935546875, -804.301025390625, -401.11749267578125, 1194.44091796875, 327.2183532714844, 218.45736694335938, 440.3839111328125, 94.31318664550781, 1191.51220703125, -141.69711303710938, -56.23785400390625, 1148.2791748046875, 244.37689208984375, -19.99614715576172, 319.3388671875, 114.9411392211914, 26.70893096923828, 180.29185485839844, -110.80839538574219, -29.61568832397461, -33.41997528076172, 238.46568298339844, 171.54800415039062, 1927.126953125, 559.6919555664062, 1019.81494140625, 107.99942779541016, -827.7745361328125, 757.6871948242188, 738.795654296875, 595.9775390625, 1125.798095703125, -109.6457748413086, 526.3001098632812, 270.28472900390625, 347.4278564453125, 1105.043701171875, 1505.9007568359375, 1820.27783203125, -423.37335205078125, 780.5863647460938, 1038.1446533203125, 466.21893310546875, 38.84485626220703, 73.25572204589844, -543.4700317382812, 739.0069580078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000559.npy"}
|
|
{"epoch": 0.8208516886930984, "step": 560, "batch_size": 64, "mean": 357.69287109375, "std": 565.0054931640625, "min": -996.400634765625, "p10": -245.65267333984374, "median": 303.09552001953125, "p90": 1133.5769165039064, "max": 2097.720947265625, "pos_frac": 0.734375, "sample": [2097.720947265625, -325.7869873046875, -95.31339263916016, 1066.47509765625, -394.8238830566406, 651.9838256835938, 46.444725036621094, -141.86663818359375, 78.88043212890625, 374.4868469238281, 240.1785888671875, 166.73007202148438, 235.3343505859375, 533.409423828125, 1427.1424560546875, -244.77630615234375, 754.884765625, 1275.8941650390625, 508.66534423828125, 1018.2033081054688, 764.4158935546875, -79.945068359375, 190.16567993164062, 1298.2156982421875, 890.7634887695312, -610.0862426757812, 9.581363677978516, 1058.928955078125, 461.539794921875, -167.87799072265625, 802.56689453125, -203.1394805908203, 25.875200271606445, 554.131103515625, 523.3094482421875, -474.00537109375, 108.58338165283203, 1343.7366943359375, 259.5227966308594, 519.6653442382812, 517.1514892578125, -148.9063720703125, 454.9129333496094, 212.02902221679688, 590.9077758789062, 1270.9249267578125, -121.27659606933594, -29.97148895263672, -105.65669250488281, 1162.3348388671875, 597.5421752929688, 215.13980102539062, 592.54345703125, 231.05320739746094, 364.58709716796875, 346.6682434082031, -246.02825927734375, 499.47454833984375, 776.7636108398438, 630.0510864257812, -668.5167236328125, 102.12609100341797, -996.400634765625, 95.07427978515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000560.npy"}
|
|
{"epoch": 0.8223201174743024, "step": 561, "batch_size": 64, "mean": 290.80755615234375, "std": 626.5330200195312, "min": -2158.138427734375, "p10": -301.5412231445312, "median": 241.5041961669922, "p90": 1074.555163574219, "max": 1797.16650390625, "pos_frac": 0.734375, "sample": [615.8914184570312, 433.43890380859375, 164.72933959960938, 26.20345115661621, -965.4077758789062, 1351.2918701171875, 755.2005004882812, 61.89152526855469, 604.3289184570312, 1099.5650634765625, 150.4874267578125, 499.8882751464844, 1797.16650390625, -192.80177307128906, 208.43368530273438, 181.66714477539062, 139.87884521484375, -192.43435668945312, 332.8135986328125, 428.0658264160156, 1250.7113037109375, 194.05337524414062, 536.4509887695312, 385.2052917480469, -273.8639221191406, 150.75962829589844, 807.1964111328125, 348.3386535644531, -55.89008331298828, 1612.5487060546875, 765.86181640625, 1228.4613037109375, 0.7635574340820312, 256.0184020996094, 906.917236328125, -564.5177612304688, -313.4029235839844, 485.80975341796875, 20.54621124267578, 619.972900390625, 156.4613800048828, -395.6934509277344, -181.5498046875, 738.0253295898438, 477.9676208496094, -152.6214599609375, 234.28814697265625, 783.8654174804688, 1016.19873046875, 248.72024536132812, -2158.138427734375, 1536.8876953125, 93.73872375488281, -142.76373291015625, 432.389404296875, -110.00994873046875, 433.54498291015625, -41.59745788574219, 637.7008056640625, 329.4111328125, -776.0850830078125, 149.53848266601562, -391.5860900878906, -169.24929809570312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000561.npy"}
|
|
{"epoch": 0.8237885462555066, "step": 562, "batch_size": 64, "mean": 415.5235290527344, "std": 612.984619140625, "min": -1267.4881591796875, "p10": -214.06683807373045, "median": 381.84637451171875, "p90": 1015.738592529297, "max": 2623.953857421875, "pos_frac": 0.78125, "sample": [613.6571655273438, 2623.953857421875, 1377.3408203125, 515.568359375, 132.01229858398438, 396.4083557128906, 766.8854370117188, 278.8541259765625, 928.9521484375, 731.796142578125, 762.6229248046875, 909.1941528320312, 228.53414916992188, 498.9238586425781, 1074.3541259765625, -129.22727966308594, 249.65289306640625, 901.14453125, 205.92665100097656, 417.2326354980469, 182.1884002685547, 547.1585083007812, 612.2214965820312, -284.4851379394531, 28.61266326904297, 838.145263671875, 1054.0753173828125, -53.32965087890625, 1020.980224609375, 242.7193603515625, -225.5654296875, -199.01806640625, -92.73805236816406, 736.126708984375, -148.4315185546875, 2329.312255859375, -205.40411376953125, 458.006591796875, 630.9888916015625, 986.3609008789062, 703.9898071289062, 218.09104919433594, 647.3856201171875, 259.4293518066406, -1267.4881591796875, 367.2843933105469, -217.77943420410156, 863.4542236328125, 428.6602783203125, 1003.5081176757812, 14.892276763916016, -67.01170349121094, 116.8498306274414, 74.80516052246094, 583.6929931640625, 343.60955810546875, -597.1651000976562, 28.26569175720215, 145.9051971435547, -565.93603515625, 100.52760314941406, -457.06201171875, 1101.2603759765625, 822.623291015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000562.npy"}
|
|
{"epoch": 0.8252569750367107, "step": 563, "batch_size": 64, "mean": 322.9136657714844, "std": 727.1387939453125, "min": -2543.427978515625, "p10": -507.6020172119141, "median": 361.9321594238281, "p90": 1049.5878112792973, "max": 1800.484619140625, "pos_frac": 0.78125, "sample": [195.35293579101562, -2543.427978515625, -658.5095825195312, 428.5937805175781, 744.9075317382812, 90.1689453125, 1367.1435546875, 39.749420166015625, 311.71234130859375, 622.2017822265625, 908.3131103515625, 185.2857666015625, -523.1466674804688, 874.764892578125, 810.2653198242188, 690.27490234375, 108.17829895019531, 1800.484619140625, 760.2151489257812, -179.48081970214844, 649.120849609375, 142.7103271484375, -198.00332641601562, 325.611328125, 952.0043334960938, 4.299396514892578, 721.4122314453125, 325.92755126953125, 1218.3294677734375, 945.7196044921875, 127.53182983398438, 681.7351684570312, -1707.1668701171875, 562.5423583984375, -114.81983947753906, 137.2037353515625, 43.38467788696289, 699.8319702148438, 551.8800048828125, 358.964111328125, 925.1865234375, 580.4298706054688, 1286.978759765625, 292.09588623046875, 1091.4093017578125, -214.047607421875, -1291.4541015625, 1786.34228515625, -497.39752197265625, 1423.6688232421875, -23.12728500366211, 722.1444702148438, 80.96558380126953, -651.5816650390625, 764.866943359375, -511.9753723144531, 467.97747802734375, -276.892822265625, 91.08616638183594, 565.6778564453125, 364.90020751953125, 399.0166320800781, 268.71929931640625, 560.21923828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000563.npy"}
|
|
{"epoch": 0.8267254038179148, "step": 564, "batch_size": 64, "mean": 394.5144348144531, "std": 520.6907958984375, "min": -521.251220703125, "p10": -221.63592987060545, "median": 312.4113311767578, "p90": 1096.330364990235, "max": 1982.6839599609375, "pos_frac": 0.796875, "sample": [358.36639404296875, 505.9044494628906, 768.9666748046875, 162.2593231201172, 636.3931884765625, 577.0772705078125, 674.14111328125, -299.3273010253906, 267.1108093261719, -521.251220703125, 518.1236572265625, 738.4266967773438, -297.51678466796875, 339.2051086425781, 946.8407592773438, 33.424072265625, -172.43238830566406, 1299.3521728515625, 1160.3973388671875, 472.2520751953125, 314.1167297363281, 267.352783203125, 189.8993377685547, -203.91526794433594, -442.12835693359375, 354.7521057128906, -229.23049926757812, 180.36270141601562, 23.019378662109375, -397.58966064453125, 336.94635009765625, 1792.7388916015625, 707.8428344726562, -103.20543670654297, 40.677040100097656, 359.42401123046875, 151.5451202392578, 129.77682495117188, 375.947021484375, -37.11881637573242, 605.29296875, 1982.6839599609375, -56.919677734375, 641.2166748046875, 775.7149047851562, 762.8629150390625, 1352.9329833984375, 777.1563720703125, 1359.8720703125, 310.7059326171875, 12.285881042480469, 657.2208862304688, 44.661659240722656, 85.62626647949219, 125.35047912597656, 296.95599365234375, 233.19911193847656, 870.415283203125, 309.9841003417969, -78.07313537597656, 1373.392578125, -272.2886962890625, 879.606689453125, 220.1390380859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000564.npy"}
|
|
{"epoch": 0.8281938325991189, "step": 565, "batch_size": 64, "mean": 425.871826171875, "std": 733.3487548828125, "min": -1156.9530029296875, "p10": -442.4836364746093, "median": 270.57435607910156, "p90": 1231.387341308594, "max": 2558.281005859375, "pos_frac": 0.703125, "sample": [75.28916931152344, 1181.79833984375, 1308.11572265625, -1156.9530029296875, 240.15008544921875, 864.7788696289062, 2077.96240234375, -37.83174133300781, -164.12818908691406, -336.58868408203125, 697.368408203125, 1068.8970947265625, -274.7757568359375, -931.8466796875, 223.24322509765625, -487.8671875, -492.0749206542969, -65.56683349609375, -98.73779296875, 467.7093200683594, 1121.5511474609375, 152.12289428710938, 214.13308715820312, 1503.8272705078125, -561.5771484375, 9.830940246582031, 189.18701171875, 283.8932189941406, -606.6202392578125, 999.4319458007812, 633.6184692382812, 258.8524169921875, 603.7007446289062, 825.87548828125, 760.7320556640625, -2.3686065673828125, 883.15380859375, 597.4627075195312, -273.6283264160156, -108.80590057373047, -73.337646484375, 971.1555786132812, 734.2279663085938, 1811.6168212890625, 888.0651245117188, 2558.281005859375, 533.6365966796875, 2299.6005859375, 1179.17138671875, -513.7391357421875, -183.4615478515625, 12.933822631835938, 911.5269775390625, 493.7231140136719, 857.148193359375, 282.2962951660156, 1085.064208984375, 305.3200988769531, 102.1541748046875, 31.994163513183594, -297.19512939453125, 250.62673950195312, 119.03251647949219, 1252.6397705078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000565.npy"}
|
|
{"epoch": 0.8296622613803231, "step": 566, "batch_size": 64, "mean": 476.88720703125, "std": 758.7432250976562, "min": -1359.807373046875, "p10": -376.5286499023438, "median": 543.9501037597656, "p90": 1471.5723022460943, "max": 2248.64990234375, "pos_frac": 0.703125, "sample": [-217.802734375, -790.6260986328125, 634.6210327148438, 2049.458251953125, 499.8834533691406, -116.77511596679688, 763.507080078125, 225.90145874023438, -1013.7245483398438, 540.7199096679688, 606.8499755859375, -243.5937957763672, -24.527374267578125, 678.3674926757812, 929.6915893554688, 424.60833740234375, 713.0186767578125, 81.45323181152344, 11.807575225830078, -1359.807373046875, 1003.4382934570312, 659.2338256835938, 547.1802978515625, 41.41656494140625, 1309.3543701171875, -561.819580078125, 499.44146728515625, 1072.969970703125, -91.51036071777344, 1135.3765869140625, 832.5238647460938, 1046.8336181640625, 697.649658203125, 101.98365783691406, 1050.00390625, -302.6352844238281, -110.51602172851562, 1949.1251220703125, 1003.1403198242188, -189.8208770751953, -371.37811279296875, 812.3526611328125, 599.7073364257812, 1518.453857421875, 1362.1820068359375, 1789.284912109375, 2248.64990234375, 628.2542724609375, -920.0093383789062, 408.8258056640625, -636.0216064453125, 1660.858154296875, 805.7726440429688, 1539.776123046875, -63.213279724121094, -378.73602294921875, 1103.0872802734375, 386.82452392578125, 1151.8720703125, -371.17547607421875, -1.3901290893554688, 27.5699462890625, 653.9216918945312, 478.9113464355469], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000566.npy"}
|
|
{"epoch": 0.8311306901615272, "step": 567, "batch_size": 64, "mean": 364.84234619140625, "std": 479.86114501953125, "min": -1061.8560791015625, "p10": -82.09771308898922, "median": 295.373046875, "p90": 977.8046325683595, "max": 1804.6904296875, "pos_frac": 0.875, "sample": [144.08807373046875, 45.420799255371094, 1283.1480712890625, 602.94921875, 386.96295166015625, 600.962158203125, 137.29998779296875, -99.67569732666016, 1040.1136474609375, 175.13967895507812, 985.4866943359375, 89.10858917236328, 1082.17138671875, 280.8533935546875, 118.07839965820312, 280.71478271484375, 1247.504638671875, 588.5369262695312, 604.0741577148438, 45.2228889465332, 603.8561401367188, 191.95223999023438, -317.6310729980469, 617.4744262695312, 307.94146728515625, 658.6990356445312, 730.9236450195312, 1480.10888671875, 12.26411247253418, 663.7965087890625, 39.65129852294922, 300.27386474609375, 397.36407470703125, -1061.8560791015625, 272.365966796875, -41.08241653442383, 39.75784683227539, 753.9652099609375, 1804.6904296875, 796.4879150390625, 176.88400268554688, -836.6469116210938, 261.08782958984375, 121.1862564086914, 237.82801818847656, 959.8798217773438, 290.47222900390625, 303.4580078125, 422.90130615234375, 382.796630859375, 141.90159606933594, 866.526123046875, 22.52819061279297, 459.7042236328125, -158.08949279785156, 284.4071044921875, 111.5892105102539, 318.0274658203125, -115.7198257446289, 250.38507080078125, 415.81207275390625, 537.8176879882812, -472.74871826171875, 478.75634765625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000567.npy"}
|
|
{"epoch": 0.8325991189427313, "step": 568, "batch_size": 64, "mean": 481.43841552734375, "std": 534.9915771484375, "min": -841.20751953125, "p10": -83.31899223327635, "median": 384.1775360107422, "p90": 1135.2046264648438, "max": 1940.77099609375, "pos_frac": 0.875, "sample": [695.5301513671875, -757.6051025390625, 1264.94873046875, 924.10205078125, 494.59136962890625, 355.8132629394531, 610.6548461914062, 732.345703125, 386.5599060058594, -841.20751953125, -267.3951416015625, -125.4667739868164, 582.6368408203125, 1834.2989501953125, 211.94210815429688, 510.8014831542969, 230.69808959960938, 171.48638916015625, 213.03660583496094, -93.14503479003906, 556.40576171875, 374.3148498535156, 973.9144287109375, 988.6926879882812, 566.7821044921875, 312.89263916015625, 125.92330169677734, 1940.77099609375, 273.8937072753906, 359.4218444824219, 867.3018798828125, 653.2177734375, 1140.0499267578125, 660.8331909179688, 1513.4569091796875, 409.9891052246094, 205.69456481933594, 620.5978393554688, 300.0257873535156, 861.8280029296875, 1022.9963989257812, 61.87314224243164, 756.4208984375, 893.9119873046875, 80.22441101074219, 352.66485595703125, 142.56637573242188, -60.39155960083008, 348.48431396484375, 1123.89892578125, 74.0719223022461, 1258.244140625, 381.795166015625, -633.645263671875, 234.2515869140625, -345.905517578125, 1212.24560546875, 1017.4331665039062, 214.04208374023438, 622.9640502929688, 172.7412109375, 756.938232421875, 141.07086181640625, 142.5283660888672], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000568.npy"}
|
|
{"epoch": 0.8340675477239354, "step": 569, "batch_size": 64, "mean": 409.6417236328125, "std": 701.2659301757812, "min": -1879.828857421875, "p10": -260.9833984375, "median": 337.5604705810547, "p90": 1336.0948730468751, "max": 2278.959716796875, "pos_frac": 0.78125, "sample": [-578.4137573242188, 54.675567626953125, 1883.68994140625, 187.74777221679688, 496.430419921875, -227.63475036621094, 715.1581420898438, 132.44656372070312, -513.5473022460938, 250.1419219970703, 805.8899536132812, 1355.64697265625, 503.05126953125, 551.7141723632812, 375.56298828125, -627.390869140625, 247.0337677001953, 933.3366088867188, -258.3299865722656, -28.069726943969727, -1299.63232421875, 1687.2889404296875, 127.34591674804688, -229.16680908203125, 56.6011962890625, 678.4440307617188, 1071.5565185546875, 69.94920349121094, 42.892120361328125, 99.157958984375, 1355.3057861328125, 899.385498046875, 631.053466796875, -31.934173583984375, 1342.795654296875, 403.82135009765625, 84.75689697265625, 366.2422790527344, 776.3237915039062, -1879.828857421875, 570.6248779296875, 166.63258361816406, -2.849367141723633, 1125.398681640625, 903.0336303710938, 140.81649780273438, 883.4136352539062, 152.6683349609375, -262.1205749511719, 748.294677734375, -341.0252685546875, 1810.72998046875, 68.09087371826172, 486.5356140136719, 1004.17041015625, -29.56775665283203, 137.07269287109375, 677.570556640625, 308.878662109375, 168.83282470703125, 676.3486328125, 712.6041259765625, 1320.459716796875, 2278.959716796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000569.npy"}
|
|
{"epoch": 0.8355359765051396, "step": 570, "batch_size": 64, "mean": 374.2774658203125, "std": 598.9781494140625, "min": -901.3948974609375, "p10": -237.0114028930664, "median": 291.8476867675781, "p90": 1206.1352050781256, "max": 2597.140380859375, "pos_frac": 0.75, "sample": [315.7825927734375, 1070.4232177734375, 409.52978515625, 1534.0987548828125, 2597.140380859375, 119.66828918457031, 758.6392211914062, 1392.405029296875, 1500.0836181640625, 297.18267822265625, 354.30413818359375, 985.4174194335938, 251.56161499023438, 489.50390625, 26.35011863708496, 445.7933349609375, 861.9533081054688, 431.77862548828125, 768.7467041015625, -205.66416931152344, 772.7699584960938, -246.90554809570312, -565.1295166015625, -737.1978759765625, 142.52801513671875, 909.3728637695312, -532.8028564453125, -221.03518676757812, 298.455078125, 448.89727783203125, 518.013916015625, 296.4080810546875, -182.9464111328125, -452.33587646484375, 229.93914794921875, 261.95172119140625, 235.97703552246094, 458.1348876953125, 79.83940887451172, 1264.2974853515625, 514.72216796875, -196.27545166015625, 225.71676635742188, 191.12982177734375, 1276.310791015625, 287.28729248046875, 229.69947814941406, 714.0625, 1050.472412109375, 760.6368408203125, -80.09114074707031, 210.15097045898438, 557.272705078125, 403.22021484375, -66.19849395751953, -901.3948974609375, 273.6090087890625, -25.963180541992188, 160.4578399658203, -147.58294677734375, -174.14974975585938, 1369.2020263671875, 182.39242553710938, -243.8583526611328], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000570.npy"}
|
|
{"epoch": 0.8370044052863436, "step": 571, "batch_size": 64, "mean": 441.9400939941406, "std": 676.9304809570312, "min": -1198.64501953125, "p10": -393.48218383789055, "median": 426.2853546142578, "p90": 1314.4791259765625, "max": 2164.349365234375, "pos_frac": 0.78125, "sample": [560.3220825195312, 756.334228515625, 125.10853576660156, 878.177001953125, -221.01943969726562, 546.2217407226562, 70.96754455566406, 963.042724609375, 407.89617919921875, 1118.7274169921875, 1998.022216796875, -418.4427185058594, -255.92282104492188, 1627.4737548828125, 198.0320281982422, 540.9692993164062, 814.8438720703125, 767.069091796875, 444.6745300292969, 778.9146118164062, 2164.349365234375, 1805.2451171875, 725.2761840820312, 1141.830078125, -335.2409362792969, 36.7720947265625, 772.2911987304688, 370.8477783203125, 1451.2557373046875, 1170.7362060546875, 253.68235778808594, -103.18426513671875, 385.9552307128906, 1654.54541015625, 898.470703125, -96.69358825683594, -558.354248046875, 151.25466918945312, 1290.3157958984375, 532.29931640625, 850.789794921875, -452.4691467285156, 1324.8348388671875, 548.42138671875, 453.92138671875, 95.98603057861328, 579.8519897460938, 192.3850860595703, 675.3731079101562, -33.67079162597656, -554.9570922851562, -1198.64501953125, 309.052001953125, 70.72232055664062, -713.4501342773438, 312.97857666015625, 213.52537536621094, 497.9919128417969, -916.7510986328125, 62.05192184448242, -160.63143920898438, 612.4307861328125, 23.352081298828125, 78.00732421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000571.npy"}
|
|
{"epoch": 0.8384728340675477, "step": 572, "batch_size": 64, "mean": 461.7014465332031, "std": 763.6243896484375, "min": -1400.8536376953125, "p10": -313.31953125, "median": 371.12451171875, "p90": 1419.4121582031255, "max": 3114.997314453125, "pos_frac": 0.71875, "sample": [-288.4643859863281, -172.5117950439453, -325.90411376953125, 527.278076171875, 1879.8599853515625, 26.68596649169922, 74.90310668945312, -2.7791366577148438, -24.76003646850586, 205.47232055664062, 11.954366683959961, 1297.25634765625, 877.7000732421875, 333.78271484375, 593.1782836914062, 1159.8497314453125, 966.074462890625, 1229.1873779296875, 1471.7646484375, 649.677978515625, 262.94256591796875, 743.022705078125, -67.67192077636719, 1078.5723876953125, -945.9639282226562, 106.76856994628906, 485.35504150390625, -314.899658203125, -259.4750671386719, 927.9746704101562, -499.3311767578125, 529.977783203125, 467.63702392578125, 36.869049072265625, 254.51571655273438, 3114.997314453125, 93.88127899169922, 384.1043395996094, -301.79461669921875, -616.8578491210938, 1537.0238037109375, 131.5619659423828, -309.632568359375, 576.319580078125, -218.76516723632812, -410.3190612792969, 1172.1829833984375, 803.3600463867188, 750.2965087890625, -13.108833312988281, 883.8756103515625, 318.96588134765625, 392.9934997558594, 1988.080810546875, 197.90866088867188, 358.1446838378906, 633.6065673828125, 2108.855224609375, -16.3323974609375, -1400.8536376953125, 626.2842407226562, 1130.481201171875, 1671.9586181640625, 665.1743774414062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000572.npy"}
|
|
{"epoch": 0.8399412628487518, "step": 573, "batch_size": 64, "mean": 397.1726379394531, "std": 571.3529052734375, "min": -683.7924194335938, "p10": -162.93608703613282, "median": 307.73211669921875, "p90": 970.231756591797, "max": 2552.51513671875, "pos_frac": 0.78125, "sample": [662.5989379882812, -101.63604736328125, 9.878927230834961, -3.90899658203125, -21.443313598632812, 2552.51513671875, 915.3222045898438, -85.11620330810547, 164.07861328125, 201.5098114013672, 431.7277526855469, 251.15065002441406, -683.7924194335938, 2102.4599609375, 385.6981201171875, 509.7904052734375, 1771.2314453125, -406.69256591796875, 36.60053253173828, -161.75836181640625, -514.3322143554688, 1100.812255859375, 796.1416015625, 541.1318359375, 66.79999542236328, 172.05316162109375, 908.5421752929688, 999.0784301757812, 141.688232421875, 1168.0059814453125, -288.774169921875, 905.3262329101562, 347.9628601074219, 546.2111206054688, 616.08251953125, -326.0085754394531, 347.48046875, 84.62469482421875, 784.6591796875, 1.5769119262695312, 935.586669921875, 149.11105346679688, 291.53240966796875, -97.2298812866211, 374.9677429199219, 82.74308776855469, 323.93182373046875, -29.970775604248047, 904.9322509765625, -231.95855712890625, 526.148193359375, 144.92941284179688, 135.90203857421875, 750.5255126953125, 382.179443359375, 65.0398178100586, -163.44082641601562, 823.420166015625, 586.8304443359375, 544.9083251953125, 148.5570068359375, 731.7709350585938, 985.0796508789062, 124.27763366699219], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000573.npy"}
|
|
{"epoch": 0.8414096916299559, "step": 574, "batch_size": 64, "mean": 435.4447021484375, "std": 592.907958984375, "min": -964.9228515625, "p10": -275.53297119140626, "median": 371.8797302246094, "p90": 1277.0200805664065, "max": 2011.835205078125, "pos_frac": 0.765625, "sample": [236.34158325195312, 770.7737426757812, -97.36581420898438, -157.781982421875, 1220.536865234375, 63.42481231689453, 112.4822998046875, 1342.5140380859375, 1051.1962890625, 752.2637939453125, 6.582328796386719, 1573.4635009765625, 1098.2789306640625, 844.1633911132812, 160.15792846679688, 673.7964477539062, 96.11516571044922, 433.34674072265625, -664.013427734375, 2011.835205078125, 553.274658203125, 640.0907592773438, 475.29913330078125, 654.3084716796875, 186.48236083984375, 749.4594116210938, 387.7563781738281, -390.9698486328125, 97.97563171386719, -262.0009765625, 166.64797973632812, -449.3101501464844, 239.108642578125, 1301.2271728515625, -79.96332550048828, 758.4920654296875, -502.0036315917969, 843.802490234375, 235.06394958496094, 544.8193359375, 944.9201049804688, -281.3323974609375, 746.9328002929688, 881.42431640625, 638.92138671875, 356.0030822753906, 459.7406921386719, -31.045791625976562, -964.9228515625, -313.2892761230469, 338.0817565917969, 113.84273529052734, -45.3514404296875, 1466.861328125, -6.810821533203125, 804.9599609375, 342.7672424316406, 1457.8109130859375, 168.4349822998047, 614.7279663085938, 90.02974700927734, 1755.359375, 658.5471801757812, -5.824575424194336], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000574.npy"}
|
|
{"epoch": 0.8428781204111601, "step": 575, "batch_size": 64, "mean": 408.8327941894531, "std": 692.0982055664062, "min": -1272.97509765625, "p10": -320.3971405029297, "median": 415.3527374267578, "p90": 1332.0375366210942, "max": 2188.193603515625, "pos_frac": 0.765625, "sample": [1465.424560546875, 228.1717071533203, 814.9505004882812, 420.80548095703125, 551.3151245117188, -888.767822265625, 591.6483154296875, 499.8295593261719, 221.30471801757812, 157.1644744873047, 41.98218536376953, -41.273963928222656, 409.8999938964844, 1070.9566650390625, 586.661865234375, 321.6205749511719, -132.82443237304688, 796.3422241210938, 233.2353057861328, 314.25885009765625, 365.1969909667969, 80.4813232421875, 1372.2579345703125, 643.0272827148438, 611.4755859375, 376.2348327636719, -1247.865478515625, 290.3222351074219, -740.42041015625, 567.1141357421875, -927.0393676757812, 1238.18994140625, 879.6681518554688, 1385.8369140625, 260.66162109375, -145.3790740966797, -332.25958251953125, -66.8990707397461, 1396.57275390625, 1031.083984375, -221.46014404296875, 246.032470703125, -292.7181091308594, 1030.09912109375, 1121.748291015625, -825.1250610351562, 520.4028930664062, 548.7919921875, 833.9404907226562, 2188.193603515625, 533.103271484375, 940.91162109375, 436.1822814941406, -28.29052734375, 1541.2655029296875, 137.87576293945312, 576.900390625, -1272.97509765625, 2074.338134765625, 76.48139953613281, -173.65338134765625, 682.8416748046875, 139.5484161376953, 649.8966064453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000575.npy"}
|
|
{"epoch": 0.8443465491923642, "step": 576, "batch_size": 64, "mean": 310.19610595703125, "std": 582.2504272460938, "min": -868.3506469726562, "p10": -375.238232421875, "median": 207.0791244506836, "p90": 1057.5937866210938, "max": 1702.5860595703125, "pos_frac": 0.734375, "sample": [209.99623107910156, 1070.5836181640625, -105.47615051269531, -6.936397552490234, -381.5882568359375, -198.58961486816406, -173.29588317871094, 1662.43798828125, 895.5691528320312, 161.654052734375, 256.9646301269531, 1597.053955078125, 113.656005859375, 106.28175354003906, 779.0325317382812, 467.09991455078125, 418.6155700683594, -868.3506469726562, 288.5549011230469, 712.655029296875, 537.4078979492188, 1098.317626953125, 620.5294189453125, 829.294189453125, 16.948223114013672, 1425.828369140625, -99.71403503417969, -505.26727294921875, -189.78848266601562, 710.7003173828125, -252.6876678466797, 221.0395050048828, 49.127655029296875, 37.75945281982422, 880.8800048828125, 515.5045776367188, 1702.5860595703125, 283.92327880859375, 250.63153076171875, 721.8125, 62.59375762939453, 962.08642578125, 183.60623168945312, 339.9759521484375, 79.57881927490234, -821.0774536132812, -59.22923278808594, 689.94091796875, -622.3385009765625, 664.3286743164062, 416.3743896484375, 1141.4913330078125, 1027.2841796875, 927.1409301757812, -853.0106811523438, -393.6453552246094, 143.50177001953125, 192.08389282226562, 204.16201782226562, 129.7833251953125, -360.4215087890625, 147.24679565429688, -329.06439208984375, 119.40848541259766], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000576.npy"}
|
|
{"epoch": 0.8458149779735683, "step": 577, "batch_size": 64, "mean": 401.912109375, "std": 631.7845458984375, "min": -1452.9244384765625, "p10": -97.35993881225583, "median": 366.6878204345703, "p90": 1332.658288574219, "max": 2417.468505859375, "pos_frac": 0.84375, "sample": [-1452.9244384765625, 2417.468505859375, 1280.563232421875, 1603.3162841796875, 167.70355224609375, 30.611068725585938, 293.5157775878906, 167.2516326904297, 132.27825927734375, -60.777496337890625, 720.837158203125, 308.7969055175781, 563.520751953125, 437.71392822265625, 435.56707763671875, 728.2371215820312, 1006.522705078125, 1391.872314453125, -542.3311157226562, 652.0997314453125, 166.34364318847656, 42.701698303222656, -883.5936279296875, 190.3641357421875, 180.4694366455078, 693.6920166015625, 157.22628784179688, 273.78363037109375, 444.65509033203125, 405.77264404296875, 145.46994018554688, 468.100830078125, 281.4839172363281, -66.41008758544922, 1695.4322509765625, 52.893218994140625, 451.740234375, -584.7122192382812, 231.29786682128906, -369.4532470703125, 414.4211730957031, 189.31265258789062, 373.2763671875, 453.73150634765625, -37.45246124267578, 501.1947021484375, 67.82737731933594, 639.050048828125, 794.651611328125, 971.431884765625, 77.46156311035156, 360.0992736816406, 1354.9847412109375, -110.62416076660156, 1745.396484375, 437.8055419921875, 483.0414123535156, -654.9757080078125, 68.59803771972656, 1525.2301025390625, 19.826950073242188, 964.4496459960938, 436.1697692871094, 388.36669921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000577.npy"}
|
|
{"epoch": 0.8472834067547724, "step": 578, "batch_size": 64, "mean": 445.94696044921875, "std": 558.50341796875, "min": -841.18408203125, "p10": -159.86340026855467, "median": 380.1394500732422, "p90": 1199.9249755859382, "max": 1979.003662109375, "pos_frac": 0.8125, "sample": [1979.003662109375, 196.05105590820312, 790.473876953125, 108.0347900390625, 565.68994140625, 319.49176025390625, 1355.3612060546875, 489.0502014160156, 173.10775756835938, -175.7266082763672, 94.56291961669922, 907.5895385742188, 428.50909423828125, 253.466064453125, 169.84732055664062, -516.6857299804688, 131.19053649902344, 21.537376403808594, 1825.1153564453125, 870.5615234375, 720.1295776367188, 423.28948974609375, 1049.0308837890625, -95.38566589355469, 7.081901550292969, 843.8867797851562, 791.9429931640625, -105.58014678955078, -335.6471252441406, 136.49588012695312, -27.696701049804688, -426.81103515625, 360.0050048828125, 400.2738952636719, 161.3767852783203, 555.5060424804688, -225.795166015625, 265.83154296875, -841.18408203125, -131.27557373046875, 533.907470703125, 729.3858642578125, 473.73919677734375, 453.1185607910156, 1264.5938720703125, 941.7138671875, 298.4980163574219, 104.05093383789062, 839.4359130859375, 169.5694122314453, 809.067138671875, 842.8860473632812, 75.14305114746094, 1320.4613037109375, 845.3843994140625, 1671.4742431640625, -168.24301147460938, 301.1112060546875, 1477.373046875, 999.746826171875, 547.090087890625, -140.31097412109375, 95.40986633300781, 544.2922973632812], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000578.npy"}
|
|
{"epoch": 0.8487518355359766, "step": 579, "batch_size": 64, "mean": 268.92694091796875, "std": 599.6517333984375, "min": -1646.5748291015625, "p10": -428.4040008544921, "median": 297.35520935058594, "p90": 1009.3469482421879, "max": 2155.751953125, "pos_frac": 0.703125, "sample": [5.945762634277344, -157.0176239013672, -1646.5748291015625, 655.760009765625, -67.83059692382812, 132.76034545898438, 849.9083862304688, 326.7603454589844, 681.1786499023438, 400.8255615234375, -12.812446594238281, -140.3833465576172, -749.4246215820312, 446.69012451171875, 281.6227111816406, 639.7239990234375, 280.84698486328125, 499.50030517578125, 1099.477783203125, 265.67333984375, 1042.0567626953125, 94.62385559082031, 372.4610900878906, 507.9697265625, 2.866567611694336, 499.0860595703125, 362.16192626953125, 116.41373443603516, 565.935791015625, 260.55401611328125, 525.0176391601562, -473.97418212890625, 648.3856811523438, 678.7607421875, 2155.751953125, 1425.111083984375, 393.11572265625, -762.7191772460938, -56.95705032348633, 496.6056823730469, 1164.0098876953125, -322.0735778808594, 729.6248168945312, -290.76922607421875, 1274.8951416015625, -82.02229309082031, -28.899799346923828, -219.28968811035156, -631.258056640625, 304.27423095703125, 933.0240478515625, 492.3488464355469, -82.20674133300781, 562.8195190429688, 359.9783630371094, -831.4539794921875, 1086.4129638671875, 62.1593017578125, 263.41534423828125, -848.9281616210938, 198.19163513183594, -212.9134521484375, 290.4361877441406, 393.69061279296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000579.npy"}
|
|
{"epoch": 0.8502202643171806, "step": 580, "batch_size": 64, "mean": 503.0469055175781, "std": 693.0075073242188, "min": -1155.6800537109375, "p10": -94.24470062255858, "median": 340.78623962402344, "p90": 1265.5536376953125, "max": 2954.265380859375, "pos_frac": 0.828125, "sample": [-244.4249725341797, 1020.7371826171875, 302.07659912109375, 575.589111328125, -133.55279541015625, 194.16705322265625, 192.75401306152344, 2044.216064453125, 444.5812072753906, 268.38800048828125, 447.25164794921875, -79.240234375, 2954.265380859375, 193.54263305664062, 63.198978424072266, 690.9871215820312, 372.97406005859375, 96.48698425292969, 747.2269897460938, 554.5125732421875, 267.5425720214844, 1163.3719482421875, 1067.6842041015625, 633.605712890625, 544.9891357421875, 12.085456848144531, 1103.8895263671875, 1265.519287109375, 588.1012573242188, 146.64871215820312, 57.609031677246094, 158.35081481933594, 55.152305603027344, 97.81288146972656, 800.2908935546875, 1265.568359375, 48.76470947265625, -111.04325103759766, 934.2096557617188, 776.7854614257812, 1915.7325439453125, 10.298322677612305, -1155.6800537109375, 986.738525390625, -56.67975616455078, 772.6483764648438, -364.6560974121094, -100.67518615722656, 622.9638671875, 2734.79541015625, 0.7783985137939453, -53.173431396484375, 1430.1357421875, 105.17129516601562, 356.58001708984375, 412.2837829589844, -5.217079162597656, 337.0676574707031, 33.57793426513672, -359.5983581542969, 344.50482177734375, 959.8612670898438, 1354.742919921875, 330.122802734375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000580.npy"}
|
|
{"epoch": 0.8516886930983847, "step": 581, "batch_size": 64, "mean": 353.0070495605469, "std": 632.11962890625, "min": -901.1304321289062, "p10": -459.7761596679687, "median": 336.74456787109375, "p90": 976.6237304687502, "max": 2950.917724609375, "pos_frac": 0.734375, "sample": [184.85494995117188, 104.01484680175781, 110.71713256835938, -98.58909606933594, 403.94970703125, 453.4881896972656, -441.6654052734375, 157.42962646484375, 925.58251953125, -590.0732421875, 776.5267944335938, 756.234130859375, 505.70245361328125, 372.41778564453125, 661.8291015625, -523.139892578125, 1403.912353515625, 879.4139404296875, 522.2703857421875, 1351.0777587890625, -183.97813415527344, 646.0755615234375, 122.54617309570312, 545.068115234375, 591.5284423828125, 579.5150146484375, 297.55078125, 204.17767333984375, 55.853515625, 998.49853515625, -314.379638671875, 2950.917724609375, -31.99148941040039, 1570.51611328125, 265.36328125, 451.98687744140625, 892.239501953125, -183.7192840576172, 73.1631851196289, 472.27972412109375, 334.18621826171875, 122.66891479492188, -70.0772705078125, -305.1396484375, 1179.305908203125, 242.4610137939453, 644.6657104492188, 897.2222900390625, 1344.1505126953125, 590.088134765625, -616.7476806640625, -901.1304321289062, -12.27316665649414, -559.3189697265625, 728.538330078125, 339.30291748046875, -462.830322265625, -637.029052734375, 112.00970458984375, -452.6497802734375, 380.8357849121094, 866.017822265625, 330.93505859375, 578.09375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000581.npy"}
|
|
{"epoch": 0.8531571218795888, "step": 582, "batch_size": 64, "mean": 392.15032958984375, "std": 563.015869140625, "min": -1041.5322265625, "p10": -204.11291198730467, "median": 312.8669738769531, "p90": 1104.6315673828126, "max": 1737.7562255859375, "pos_frac": 0.71875, "sample": [-1041.5322265625, 238.01596069335938, 1282.6036376953125, 281.6400146484375, 594.06591796875, -175.46047973632812, 292.6583251953125, 469.47308349609375, 452.4276123046875, -36.41733169555664, 458.8416748046875, 114.81914520263672, -38.30348205566406, 796.0736083984375, 820.768798828125, 1462.4500732421875, -58.04328918457031, -109.03941345214844, -292.55096435546875, 1152.51904296875, 425.61614990234375, 61.49214172363281, 573.9824829101562, -618.3802490234375, -272.369384765625, 909.24365234375, 205.56005859375, 1050.0196533203125, 1496.543701171875, 1737.7562255859375, 908.5615844726562, 333.8552551269531, 1055.2310791015625, -82.29337310791016, -312.6998291015625, 189.39938354492188, -16.787498474121094, 167.86354064941406, 620.68505859375, 629.7544555664062, 333.07562255859375, -193.80235290527344, 940.1326293945312, 1092.0257568359375, 0.2979888916015625, 1071.9140625, -0.5343399047851562, 772.3154907226562, -208.53172302246094, 256.38214111328125, -5.398509979248047, 779.794189453125, 1110.0340576171875, 382.16705322265625, -41.27268600463867, -961.2208862304688, 225.75338745117188, 263.5610656738281, 722.312744140625, 656.1356201171875, 1219.14208984375, 220.24085998535156, 679.6287231445312, 55.423194885253906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000582.npy"}
|
|
{"epoch": 0.8546255506607929, "step": 583, "batch_size": 64, "mean": 238.0078582763672, "std": 661.7612915039062, "min": -1336.7723388671875, "p10": -606.3111999511718, "median": 233.1663360595703, "p90": 941.5999694824219, "max": 2076.943603515625, "pos_frac": 0.71875, "sample": [109.26333618164062, 276.58074951171875, 336.3476867675781, -604.2377319335938, 922.77880859375, 47.81910705566406, 937.4224853515625, 288.44268798828125, 2076.943603515625, 165.08624267578125, -196.64947509765625, 57.72090148925781, -962.0894775390625, 367.91680908203125, 181.0342559814453, -242.08871459960938, 856.7924194335938, 933.7696533203125, 324.7459411621094, 1067.4537353515625, 399.197021484375, 66.0381851196289, -573.7160034179688, -398.6871337890625, -105.18354034423828, -916.23193359375, 321.335205078125, 232.0499267578125, -607.1998291015625, -742.7129516601562, -157.01316833496094, -212.69363403320312, 526.8339233398438, 572.63671875, 378.8276062011719, 2001.1141357421875, 429.0114440917969, 12.69476318359375, 943.3903198242188, 226.34097290039062, 1.94256591796875, 357.3329772949219, 354.2511291503906, -102.53091430664062, -1064.560546875, -1336.7723388671875, 1645.338623046875, 440.564453125, -376.1862487792969, 708.3173828125, 933.6756591796875, 1224.9659423828125, 129.826171875, 542.4600830078125, 741.5184326171875, -753.0209350585938, 106.70289611816406, 492.9165954589844, 76.8070068359375, 234.28274536132812, 1194.641845703125, 76.88726806640625, -17.89299774169922, 279.9491271972656], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000583.npy"}
|
|
{"epoch": 0.856093979441997, "step": 584, "batch_size": 64, "mean": 487.2535705566406, "std": 585.6002197265625, "min": -647.8678588867188, "p10": -133.03034057617185, "median": 343.5148468017578, "p90": 1290.496142578125, "max": 2182.425048828125, "pos_frac": 0.796875, "sample": [602.0383911132812, 243.0557403564453, -325.84735107421875, 302.39031982421875, -107.25146484375, -407.3179931640625, 622.2150268554688, 469.64263916015625, 1135.406494140625, 59.99616622924805, -4.300537109375, -144.07843017578125, 116.279541015625, 217.48724365234375, 353.16217041015625, 1118.543212890625, -9.135562896728516, 1256.3846435546875, 924.188232421875, -450.2072448730469, 289.96270751953125, 569.3671875, -5.5470733642578125, 272.8885498046875, 363.086669921875, 172.05606079101562, 1320.22802734375, 183.75674438476562, 210.52542114257812, 228.27731323242188, 1109.98876953125, 570.8839111328125, 301.9228820800781, -647.8678588867188, 730.32177734375, 328.258056640625, 567.893798828125, 553.2552490234375, 337.4483337402344, -6.944061279296875, 481.86505126953125, -319.43011474609375, 349.58135986328125, 1643.2650146484375, 866.4600830078125, 295.64251708984375, 1072.35302734375, 1235.6195068359375, -244.47247314453125, 2182.425048828125, 1879.1805419921875, 29.758224487304688, 1594.329345703125, 1305.1153564453125, 398.57220458984375, 1610.4364013671875, 187.68519592285156, 681.2632446289062, -47.164756774902344, 287.3508605957031, 581.3255004882812, 421.22857666015625, 1110.549560546875, 158.87452697753906], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000584.npy"}
|
|
{"epoch": 0.8575624082232012, "step": 585, "batch_size": 64, "mean": 452.44818115234375, "std": 746.1478881835938, "min": -1875.4425048828125, "p10": -348.4046447753906, "median": 441.4910583496094, "p90": 1375.2043090820318, "max": 2707.573486328125, "pos_frac": 0.734375, "sample": [185.75738525390625, -236.3157196044922, 666.908203125, 301.33056640625, 506.808837890625, 2707.573486328125, 996.4623413085938, -604.22265625, 441.1911315917969, 272.681396484375, 1034.9932861328125, -21.926132202148438, 665.0318603515625, 605.245361328125, -11.313697814941406, 1619.36572265625, 1427.9552001953125, 672.8873291015625, 1529.40673828125, 492.78240966796875, 443.0336608886719, 1252.118896484375, 266.15899658203125, 441.7909851074219, 1089.684326171875, -40.64460754394531, 823.0875854492188, 1100.358642578125, 675.8514404296875, 161.0645751953125, 98.25746154785156, 753.9592895507812, 1457.2451171875, 96.05126953125, 1059.5433349609375, 262.7998046875, 1056.767822265625, -297.2117614746094, -52.09294128417969, 910.8988647460938, 351.8919677734375, -1875.4425048828125, 887.6228637695312, -805.9228515625, 1072.360107421875, 461.80218505859375, 515.9515991210938, -451.31719970703125, 854.9090576171875, -327.6022033691406, -82.29803466796875, -1195.63623046875, -62.01051712036133, -357.3199768066406, 178.19850158691406, 1577.3817138671875, 395.2718505859375, -561.59033203125, 917.3430786132812, -34.2105712890625, 258.681884765625, 132.7008514404297, 168.50782775878906, 2126.085693359375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000585.npy"}
|
|
{"epoch": 0.8590308370044053, "step": 586, "batch_size": 64, "mean": 298.210693359375, "std": 600.3308715820312, "min": -1280.1248779296875, "p10": -377.0206115722656, "median": 319.5325469970703, "p90": 1079.7588745117193, "max": 1570.41455078125, "pos_frac": 0.6875, "sample": [1196.69921875, -260.19573974609375, -262.3681640625, 551.408447265625, 649.0457763671875, 312.60711669921875, -293.5166320800781, 359.156494140625, 381.0548400878906, -51.435035705566406, -763.5840454101562, 326.4579772949219, 765.7117309570312, 1464.5875244140625, -41.708885192871094, -164.8194122314453, 613.271484375, -756.2418212890625, 198.10781860351562, 190.92564392089844, -197.89710998535156, 88.6294174194336, 1177.708251953125, 728.040283203125, 880.3999633789062, -49.33799743652344, 738.6968383789062, -241.6947021484375, 92.71794891357422, 523.772705078125, 756.1524047851562, -181.63021850585938, 212.3394012451172, -1280.1248779296875, 736.8369140625, 724.497314453125, 1476.142333984375, -397.16680908203125, 528.509033203125, -148.28500366210938, 918.5537109375, -489.44964599609375, 472.69586181640625, 446.5489501953125, -1041.1170654296875, -669.0013427734375, 379.4925842285156, 435.11285400390625, 217.43455505371094, 533.1828002929688, -330.0128173828125, 302.76544189453125, 1148.8468017578125, 1534.8275146484375, 899.1328125, 243.43211364746094, 354.681884765625, 1570.41455078125, 120.17445373535156, -78.58252716064453, 188.35894775390625, 781.3316040039062, 426.68450927734375, 136.507080078125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000586.npy"}
|
|
{"epoch": 0.8604992657856094, "step": 587, "batch_size": 64, "mean": 420.68768310546875, "std": 573.68359375, "min": -1273.1795654296875, "p10": -154.7366714477539, "median": 384.8859100341797, "p90": 1244.9475708007815, "max": 2067.340087890625, "pos_frac": 0.765625, "sample": [587.356201171875, 940.435302734375, 405.90106201171875, 130.48681640625, 358.0956115722656, 507.1255798339844, 606.4796142578125, -74.57472229003906, -0.2239227294921875, 1479.4449462890625, -479.0216064453125, -127.68295288085938, 2067.340087890625, 363.2500915527344, 216.6103515625, 514.0966796875, -46.9764404296875, 535.3993530273438, 416.8178405761719, 382.64691162109375, -216.3422393798828, 639.21484375, 1411.256591796875, 19.23479652404785, 403.635986328125, 1197.1280517578125, -255.31434631347656, 996.7789306640625, 778.5142822265625, -6.3985748291015625, 126.43407440185547, 34.92876434326172, -456.91217041015625, 57.698455810546875, 717.9330444335938, 102.03242492675781, 216.00588989257812, -1273.1795654296875, 1003.9844360351562, 129.996337890625, -340.5869140625, 807.6329345703125, 215.35533142089844, 728.1591796875, -110.23789978027344, 1265.441650390625, 716.3204956054688, -145.96226501464844, 199.60696411132812, 370.1244201660156, 505.773681640625, 1647.13525390625, 392.2542724609375, 660.7649536132812, 539.32275390625, 598.0399169921875, 1035.01806640625, -158.49713134765625, 315.24407958984375, 54.217159271240234, 387.1249084472656, -21.85009765625, 1472.5467529296875, 1381.4259033203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000587.npy"}
|
|
{"epoch": 0.8619676945668135, "step": 588, "batch_size": 64, "mean": 353.66156005859375, "std": 525.8469848632812, "min": -974.0598754882812, "p10": -261.6598449707031, "median": 319.6382141113281, "p90": 1057.4864074707034, "max": 1416.2308349609375, "pos_frac": 0.734375, "sample": [192.3203887939453, 592.7849731445312, -217.84725952148438, 142.71893310546875, -14.372940063476562, 654.9677734375, 667.8260498046875, 10.659452438354492, 831.8129272460938, 755.2213134765625, 130.45306396484375, -465.2449951171875, -75.5028076171875, 592.4539184570312, -280.3705139160156, -471.9985656738281, 911.0780029296875, 502.2361755371094, 181.11175537109375, 279.635986328125, 651.895751953125, -139.41604614257812, -113.86161804199219, 1205.0941162109375, 173.43385314941406, 500.403076171875, 1376.9005126953125, -102.915283203125, -447.2550048828125, 120.95430755615234, 330.5023498535156, -50.13246154785156, 295.25286865234375, 1075.4364013671875, 967.8363647460938, 519.5198974609375, 180.11927795410156, -218.00161743164062, 1321.5595703125, 1015.6030883789062, 1249.9793701171875, 780.2433471679688, 90.73709106445312, -597.2705688476562, 660.1234741210938, 555.3759765625, 154.3287811279297, 932.027099609375, -974.0598754882812, 356.34747314453125, -12.135204315185547, 1416.2308349609375, 1187.025390625, 404.47021484375, 491.21124267578125, 628.8068237304688, 643.1632690429688, 308.7740783691406, -155.46934509277344, 174.61956787109375, 59.828880310058594, 340.6006164550781, 913.55615234375, -557.0473022460938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000588.npy"}
|
|
{"epoch": 0.8634361233480177, "step": 589, "batch_size": 64, "mean": 468.15966796875, "std": 589.9996337890625, "min": -880.173583984375, "p10": -215.08692932128903, "median": 435.29966735839844, "p90": 1320.538940429688, "max": 2122.156982421875, "pos_frac": 0.828125, "sample": [1658.906494140625, 272.1529541015625, 348.525390625, 40.16535568237305, 778.445556640625, 167.0176544189453, -138.63758850097656, 599.890625, 381.80914306640625, -233.17510986328125, -16.258773803710938, 462.7256774902344, 61.841636657714844, 274.39404296875, 652.51513671875, 1359.568603515625, 2122.156982421875, 178.93914794921875, 551.1355590820312, 295.2319030761719, 486.1594543457031, 171.1689453125, 159.9732208251953, 1544.8935546875, -29.09368896484375, 484.25048828125, 338.9229431152344, 677.5535278320312, 1684.8232421875, -179.778564453125, 637.3652954101562, 668.5306396484375, 925.3743286132812, 483.4635009765625, 1161.1751708984375, -230.21908569335938, -255.05633544921875, 48.751991271972656, 770.6853637695312, 158.0442352294922, 499.7440490722656, -653.5261840820312, 79.28189086914062, 778.6848754882812, 625.6103515625, 483.8693542480469, 1094.949462890625, 247.4552459716797, 597.412841796875, 1452.9044189453125, 551.4810791015625, 308.9523620605469, 407.8736572265625, -880.173583984375, 248.80406188964844, -723.3213500976562, 802.0732421875, 1561.910888671875, 32.94316101074219, 306.5724182128906, 1229.4697265625, 715.557373046875, -460.8287658691406, 1130.1796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000589.npy"}
|
|
{"epoch": 0.8649045521292217, "step": 590, "batch_size": 64, "mean": 457.78570556640625, "std": 582.0707397460938, "min": -454.372314453125, "p10": -238.45634460449216, "median": 326.36663818359375, "p90": 1287.8748657226565, "max": 2155.22509765625, "pos_frac": 0.734375, "sample": [593.4669189453125, 1578.2584228515625, 878.4427490234375, 309.9553527832031, 1082.9130859375, -365.83062744140625, 94.15106201171875, 2155.22509765625, 847.8182373046875, -184.43881225585938, 590.5, -294.38824462890625, 360.927001953125, 1436.0543212890625, -125.78399658203125, -76.58747863769531, -217.28085327148438, 128.86898803710938, 932.3525390625, 224.07562255859375, 131.41162109375, -325.6878662109375, -83.10067749023438, 296.57666015625, -153.77996826171875, 178.96484375, 732.4431762695312, -368.42608642578125, 1367.046142578125, -102.11214447021484, 982.7103271484375, 497.72283935546875, -12.289833068847656, 1414.781005859375, 257.5098571777344, 414.9232482910156, 330.28619384765625, 633.52978515625, 1217.8597412109375, 580.1300659179688, 479.7681884765625, -247.53155517578125, -94.43527221679688, -454.372314453125, 322.44708251953125, 66.92303466796875, 255.25453186035156, 931.500732421875, 261.28125, -347.4739990234375, 432.94317626953125, 1317.88134765625, 297.56610107421875, -44.73668670654297, 638.0040283203125, 640.0391235351562, 920.0006713867188, 1810.8388671875, 197.29898071289062, 1079.03955078125, 1057.853271484375, 903.0587158203125, 185.2517852783203, 750.686279296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000590.npy"}
|
|
{"epoch": 0.8663729809104258, "step": 591, "batch_size": 64, "mean": 417.65093994140625, "std": 582.2152709960938, "min": -1081.7244873046875, "p10": -377.7824157714843, "median": 432.16905212402344, "p90": 1134.827734375, "max": 1724.590087890625, "pos_frac": 0.84375, "sample": [8.009956359863281, 1134.5718994140625, 1359.617919921875, 652.950927734375, 1015.6839599609375, -116.66026306152344, 44.21397399902344, -1081.7244873046875, 561.0891723632812, 1724.590087890625, 1004.9178466796875, 176.5113983154297, 716.5609130859375, 395.8931884765625, 72.63845825195312, 1631.994384765625, 826.946044921875, 598.805908203125, 129.28695678710938, 245.95574951171875, -5.6857452392578125, 933.3368530273438, 136.5625762939453, 96.72319793701172, 585.1815795898438, 147.45828247070312, -728.4397583007812, 660.01953125, 561.0390625, 435.87725830078125, -306.03057861328125, 166.44091796875, 431.08001708984375, -714.8334350585938, 1166.7333984375, 174.56651306152344, 176.88296508789062, 127.968994140625, 659.3817749023438, -432.494140625, -408.533203125, 842.864501953125, 117.47090148925781, 433.2580871582031, -593.8621215820312, 1065.641357421875, 806.3736572265625, 96.72955322265625, 655.6398315429688, 1075.82666015625, 44.851280212402344, 1134.9373779296875, 1010.8515014648438, 140.6875, 1421.1328125, 459.63507080078125, 486.7720642089844, 1268.40234375, -700.7681274414062, 657.6240844726562, 167.0007781982422, 558.3663330078125, 334.78192138671875, 280.3529357910156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000591.npy"}
|
|
{"epoch": 0.8678414096916299, "step": 592, "batch_size": 64, "mean": 379.54296875, "std": 721.2313232421875, "min": -1352.449462890625, "p10": -556.92373046875, "median": 383.8751525878906, "p90": 1228.0900634765626, "max": 2063.617431640625, "pos_frac": 0.765625, "sample": [372.8094482421875, 1123.708740234375, -61.930633544921875, 458.59088134765625, -899.839111328125, 102.78395080566406, 503.2274475097656, 996.920654296875, 1785.061767578125, 1514.098388671875, -944.9180297851562, 2063.617431640625, 252.09915161132812, 854.39501953125, -1023.5765380859375, -551.8650512695312, -176.031982421875, 338.8658142089844, 130.96441650390625, 1258.032470703125, 117.60419464111328, 1082.9810791015625, 81.53128814697266, 1141.50390625, 443.3526306152344, 103.5950927734375, 206.64450073242188, 959.383056640625, 1613.202880859375, 556.4290161132812, 125.586181640625, -279.96990966796875, 208.86032104492188, 747.863525390625, 1197.650146484375, 305.00067138671875, -559.0917358398438, 379.9553527832031, -434.902587890625, 610.5882568359375, 519.9592895507812, -95.2370376586914, 740.3555908203125, 1609.1591796875, 324.8585205078125, -348.5829162597656, 531.419921875, -1352.449462890625, 873.994873046875, 387.7949523925781, 717.68896484375, 93.15541076660156, 796.0214233398438, 562.5377197265625, -891.438720703125, 961.5790405273438, 686.79541015625, 121.183349609375, 1241.1357421875, -254.44949340820312, -1176.4217529296875, 787.58203125, 723.9195556640625, 25.40778160095215], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000592.npy"}
|
|
{"epoch": 0.869309838472834, "step": 593, "batch_size": 64, "mean": 292.6455078125, "std": 635.1069946289062, "min": -1020.4239501953125, "p10": -473.6730285644531, "median": 191.64047241210938, "p90": 1237.0400756835938, "max": 1809.688232421875, "pos_frac": 0.65625, "sample": [-20.490966796875, 721.5218505859375, -278.78204345703125, 105.03228759765625, -626.57666015625, -494.72296142578125, 1530.9227294921875, -537.826416015625, 1237.503662109375, 604.0732421875, -212.749755859375, 1465.8995361328125, 1357.226318359375, 1034.69384765625, 28.94853973388672, -844.4835815429688, 1258.147216796875, 1605.5340576171875, 773.750732421875, 118.99443054199219, -424.5565185546875, 180.14141845703125, 368.9658203125, 78.2035903930664, 57.16666793823242, 1809.688232421875, 516.2894897460938, 131.3280029296875, 373.9190673828125, -856.5426635742188, 3.712331771850586, 544.865966796875, -333.4726867675781, 203.1395263671875, 719.7118530273438, 816.03125, -128.35031127929688, 1235.9583740234375, -245.53988647460938, 391.252197265625, -863.4348754882812, 658.8230590820312, -313.7656555175781, -1020.4239501953125, -19.911771774291992, -121.3004150390625, 5.194421768188477, -6.168159484863281, 506.7860412597656, 822.3096923828125, 527.5746459960938, 409.1116027832031, 893.2279052734375, 464.294677734375, 203.50469970703125, 837.7940673828125, -4.838653564453125, 40.76165008544922, -136.94354248046875, 652.4078369140625, -82.7076416015625, 467.68798828125, -207.3360595703125, 748.137939453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000593.npy"}
|
|
{"epoch": 0.8707782672540382, "step": 594, "batch_size": 64, "mean": 367.7850036621094, "std": 616.4564819335938, "min": -910.6281127929688, "p10": -411.0436370849609, "median": 317.8326721191406, "p90": 1196.2281494140625, "max": 1777.448486328125, "pos_frac": 0.734375, "sample": [1017.4017333984375, -59.595855712890625, 175.14576721191406, 69.14517211914062, -57.42686462402344, -581.5057983398438, 600.0457153320312, 428.33831787109375, 80.08695220947266, 663.8448486328125, 547.1144409179688, 372.0157165527344, 1777.448486328125, 735.939208984375, 141.55581665039062, 947.939453125, 729.2222290039062, 244.4634552001953, 1135.4344482421875, -465.2552795410156, -574.6304321289062, 1506.6859130859375, -910.6281127929688, 1766.1566162109375, -201.4341583251953, 579.5645751953125, -82.01387786865234, 353.3313903808594, 473.4219665527344, -147.64566040039062, 102.00019836425781, 428.5030517578125, 1367.864013671875, 556.5448608398438, 314.0784912109375, 570.4225463867188, 649.12890625, 29.04336929321289, 219.76370239257812, -743.1791381835938, -111.76596069335938, 944.0581665039062, 168.4257049560547, -260.4160461425781, 1181.7342529296875, 919.30126953125, 908.3939208984375, 593.2127075195312, -425.0493469238281, 476.216796875, -879.76318359375, 41.03038024902344, 252.45416259765625, -196.583251953125, 321.58685302734375, -42.49137496948242, 1202.4398193359375, 892.04541015625, 1605.958251953125, 214.79173278808594, 1209.606201171875, -378.3636474609375, 84.13446044921875, 58.946495056152344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000594.npy"}
|
|
{"epoch": 0.8722466960352423, "step": 595, "batch_size": 64, "mean": 404.41473388671875, "std": 577.3847045898438, "min": -541.853515625, "p10": -192.28812866210936, "median": 275.3995056152344, "p90": 1043.8217773437502, "max": 2623.65673828125, "pos_frac": 0.78125, "sample": [953.302490234375, 296.90753173828125, 602.3716430664062, 20.131261825561523, -5.921363830566406, 1061.57861328125, 370.90985107421875, 942.2910766601562, -272.145751953125, 284.12847900390625, 1552.9403076171875, 93.99537658691406, 1070.237060546875, 994.39453125, -203.35023498535156, 347.67962646484375, 336.5187683105469, 1798.876220703125, 232.90968322753906, -157.19509887695312, 115.900146484375, -239.39166259765625, 587.962646484375, 378.5167541503906, -271.550048828125, -256.8160705566406, 255.28842163085938, 109.89969635009766, 237.46241760253906, 296.6717224121094, 151.14080810546875, 206.01824951171875, 265.752685546875, 222.07757568359375, 1002.38916015625, 29.06950569152832, 232.11782836914062, 585.4307250976562, 1635.6298828125, 345.3748779296875, -27.110122680664062, -541.853515625, 491.2309875488281, -345.3625183105469, 367.5914001464844, 370.0890808105469, 266.6705322265625, -166.47654724121094, 717.1514892578125, 743.1280517578125, -42.51490020751953, 675.6220092773438, 101.24102020263672, -137.79750061035156, 417.62030029296875, 120.20106506347656, 946.3353881835938, 99.65989685058594, -130.3641357421875, 2.067554473876953, 1772.771240234375, 502.32757568359375, 847.1513061523438, 2623.65673828125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000595.npy"}
|
|
{"epoch": 0.8737151248164464, "step": 596, "batch_size": 64, "mean": 296.4541015625, "std": 617.1494750976562, "min": -1073.6947021484375, "p10": -469.6156677246093, "median": 201.5656280517578, "p90": 1249.7732910156258, "max": 2032.6763916015625, "pos_frac": 0.734375, "sample": [-296.375732421875, 22.75445556640625, 675.090576171875, 55.98075866699219, 849.72998046875, 598.7153930664062, 200.46408081054688, 516.3522338867188, 266.3258972167969, 566.8270263671875, 2032.6763916015625, -66.91171264648438, -567.4591064453125, 464.22625732421875, 1034.71728515625, 1498.1697998046875, 144.98634338378906, 149.0124053955078, 492.6776428222656, 665.5847778320312, -161.79335021972656, 552.7933349609375, 81.15202331542969, 207.70704650878906, -492.6347351074219, 153.8571319580078, 607.8423461914062, 396.29351806640625, 260.73272705078125, 140.7297821044922, -390.26104736328125, 110.67964935302734, -598.20068359375, 1422.368408203125, 60.47850799560547, 1051.248779296875, 1470.409423828125, 568.8892211914062, -23.274477005004883, -87.8838119506836, 202.66717529296875, 548.9676513671875, 141.2019500732422, 355.3854675292969, -218.4068603515625, 336.677978515625, 1334.855224609375, 211.12132263183594, -298.9563293457031, -149.62977600097656, 1819.030517578125, 59.95734405517578, -1073.6947021484375, 184.53176879882812, 346.0568542480469, -648.1614379882812, 151.81227111816406, -630.8915405273438, 695.5557861328125, 57.995086669921875, 1511.018310546875, -415.9045104980469, 426.8732604980469, -609.6782836914062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000596.npy"}
|
|
{"epoch": 0.8751835535976505, "step": 597, "batch_size": 64, "mean": 506.2996826171875, "std": 706.5131225585938, "min": -1208.86181640625, "p10": -139.30214385986326, "median": 409.5483856201172, "p90": 1460.5961669921878, "max": 2420.082763671875, "pos_frac": 0.78125, "sample": [893.646484375, 593.830810546875, 1126.813232421875, 339.2828674316406, 468.82452392578125, 1534.0821533203125, -134.4252471923828, 1029.568603515625, 1485.183349609375, -396.39501953125, -50.331626892089844, -52.31353759765625, 259.1396789550781, 50.36715316772461, 777.82958984375, 108.96024322509766, 1290.513671875, 136.5440216064453, 502.1507873535156, 1754.060302734375, 446.9464111328125, 464.34515380859375, -16.660858154296875, 1046.840087890625, 244.6443328857422, 296.9124450683594, 38.33146286010742, 211.47581481933594, 54.03825378417969, -1040.7568359375, -1208.86181640625, -83.5982666015625, 1330.0147705078125, 411.54327392578125, 557.6253662109375, 2376.988525390625, 1268.1690673828125, -54.79209899902344, -442.5210266113281, 69.11266326904297, 695.8720092773438, 1289.7216796875, 87.6712417602539, 270.32861328125, 407.5534973144531, -141.39224243164062, 623.0213012695312, 960.241455078125, 61.94615173339844, 1403.22607421875, 173.76629638671875, -276.65093994140625, -235.888916015625, 2420.082763671875, 1886.2109375, 37.43095397949219, 593.603271484375, 483.849365234375, 731.2327880859375, 775.0724487304688, 113.49992370605469, 1576.70751953125, -13.945823669433594, 792.89208984375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000597.npy"}
|
|
{"epoch": 0.8766519823788547, "step": 598, "batch_size": 64, "mean": 397.79205322265625, "std": 600.700927734375, "min": -1268.64697265625, "p10": -167.12311859130858, "median": 413.224853515625, "p90": 1153.1157958984377, "max": 2151.881103515625, "pos_frac": 0.796875, "sample": [505.92498779296875, 645.6026611328125, 367.9899597167969, 1050.4078369140625, -1268.64697265625, 384.6061096191406, 1289.9119873046875, 715.7299194335938, 179.43096923828125, 1181.8826904296875, 35.404937744140625, -675.8978271484375, -939.25634765625, 204.57130432128906, 1233.946533203125, -154.6655731201172, 703.43994140625, 736.2399291992188, 1227.7435302734375, 1923.4222412109375, 258.80419921875, 1085.9930419921875, 684.9552001953125, -326.994140625, 372.25482177734375, 460.7839050292969, 540.1650390625, 354.54949951171875, -956.1549682617188, 143.5821990966797, 647.0755615234375, 630.6931762695312, 538.8916625976562, -126.6283950805664, -504.10614013671875, 585.9325561523438, 579.71875, 1195.64697265625, 198.4126434326172, 321.95782470703125, 222.80032348632812, 207.53912353515625, -133.535400390625, 435.2254943847656, -56.134185791015625, 676.0458984375, 470.34307861328125, 482.57257080078125, 21.956342697143555, 110.85208892822266, 391.2242126464844, 2151.881103515625, 823.7029418945312, 825.5302124023438, 511.50262451171875, 285.7261047363281, 30.18212890625, 1006.3673095703125, 574.5138549804688, -172.46206665039062, 13.188522338867188, -132.69622802734375, 716.4052734375, -37.36314010620117], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000598.npy"}
|
|
{"epoch": 0.8781204111600588, "step": 599, "batch_size": 64, "mean": 336.5788879394531, "std": 522.909423828125, "min": -1972.6463623046875, "p10": -138.88133239746088, "median": 301.7605895996094, "p90": 1039.9950500488285, "max": 1639.525634765625, "pos_frac": 0.84375, "sample": [435.2416076660156, 966.5208129882812, 284.79168701171875, 491.77618408203125, 787.8282470703125, 121.27008056640625, 1639.525634765625, 285.02655029296875, 186.0487060546875, 394.7696533203125, 462.606201171875, -702.1502685546875, 6.606121063232422, 94.81317138671875, 12.076528549194336, 355.14892578125, 938.9642333984375, 1071.4840087890625, 518.4083251953125, 1319.3355712890625, 189.95172119140625, 580.0748291015625, 649.5970458984375, 485.2157287597656, 348.6092529296875, -19.416330337524414, 105.90540313720703, 220.18763732910156, 300.2301025390625, -1972.6463623046875, 611.5831298828125, 303.29107666015625, 548.728271484375, 198.30284118652344, 1083.550537109375, 705.1429443359375, 532.0853881835938, -94.69094848632812, 1173.0792236328125, 281.2596740722656, 475.6258544921875, 204.38717651367188, 915.7574462890625, -289.79876708984375, -27.649795532226562, 329.0362854003906, 513.1882934570312, 180.47293090820312, 334.7714538574219, 114.73251342773438, 1114.8568115234375, 252.61827087402344, 108.47364044189453, 15.475822448730469, -314.4839172363281, 10.371395111083984, 281.57611083984375, 396.7861328125, -326.69305419921875, 1216.79150390625, -157.820068359375, -407.64227294921875, 278.8836364746094, 421.1977844238281], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000599.npy"}
|
|
{"epoch": 0.8795888399412628, "step": 600, "batch_size": 64, "mean": 324.20037841796875, "std": 617.0860595703125, "min": -1090.9415283203125, "p10": -512.173193359375, "median": 224.2927703857422, "p90": 1117.5945434570312, "max": 2093.43896484375, "pos_frac": 0.75, "sample": [124.2890396118164, 668.2526245117188, 621.9989624023438, -1090.9415283203125, 857.1494750976562, 561.8839111328125, -52.227962493896484, 554.4614868164062, -610.7509155273438, 508.9811096191406, 82.17253875732422, 1.3584861755371094, 989.1849365234375, -241.29629516601562, 294.43231201171875, 705.447509765625, -718.0370483398438, 1447.5224609375, 72.6039047241211, 346.43402099609375, 851.0516357421875, 986.6224365234375, -184.7368927001953, 285.311279296875, 32.95033264160156, -349.76885986328125, 742.1693115234375, 1090.3135986328125, 1129.286376953125, 877.7597045898438, 177.26461791992188, -15.680389404296875, 988.3028564453125, 46.37676239013672, 1155.0860595703125, 638.862548828125, 101.76560974121094, 252.61973571777344, -444.68975830078125, 1488.28125, 2093.43896484375, 217.60276794433594, -146.38047790527344, 58.44384002685547, 1447.197998046875, 166.96859741210938, 823.813232421875, 55.2473258972168, -749.3628540039062, 550.675537109375, 70.44038391113281, 1147.999755859375, 251.27239990234375, -60.68292999267578, 230.98277282714844, 828.0333862304688, -702.853759765625, 214.26638793945312, 23.439260482788086, -541.0946655273438, -603.622314453125, 478.236328125, -209.95925903320312, 132.65484619140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000600.npy"}
|
|
{"epoch": 0.8810572687224669, "step": 601, "batch_size": 64, "mean": 411.2587585449219, "std": 545.4044189453125, "min": -1081.9791259765625, "p10": -252.3417724609375, "median": 364.5184783935547, "p90": 1175.9253417968753, "max": 1725.138671875, "pos_frac": 0.78125, "sample": [738.671875, 1216.964599609375, 1725.138671875, 708.6356201171875, -100.35909271240234, 991.6216430664062, 363.4901428222656, 152.4652099609375, -255.64642333984375, 248.00836181640625, -244.63092041015625, -152.6100311279297, 1329.4010009765625, 419.93450927734375, 313.3651428222656, 698.8235473632812, 359.45062255859375, 627.32666015625, -271.8490905761719, 1362.5504150390625, 751.0856323242188, 625.5240478515625, 746.3798217773438, -124.58695983886719, 565.4600830078125, 209.34117126464844, -56.155364990234375, 556.3352661132812, 773.634033203125, 244.83541870117188, 387.0704040527344, 344.76568603515625, -937.39013671875, 662.8922119140625, -376.9301452636719, 535.5676879882812, 142.7657470703125, -62.32981872558594, 151.97756958007812, 271.5646057128906, 644.4591064453125, 792.9754638671875, 96.78063201904297, -1081.9791259765625, 1195.6224365234375, -530.8606567382812, 47.290313720703125, 1383.422607421875, 625.0029907226562, -279.064697265625, 819.8258056640625, 1006.976318359375, 149.47093200683594, 320.4667663574219, 210.14710998535156, 1031.961181640625, 323.5115051269531, 1338.1119384765625, 1129.9654541015625, 441.301025390625, -59.439247131347656, 248.56427001953125, 457.9429931640625, 365.54681396484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000601.npy"}
|
|
{"epoch": 0.882525697503671, "step": 602, "batch_size": 64, "mean": 502.82830810546875, "std": 547.1575317382812, "min": -502.1641540527344, "p10": -70.72713470458984, "median": 414.23191833496094, "p90": 1132.7218994140626, "max": 2308.622802734375, "pos_frac": 0.84375, "sample": [490.5845031738281, 159.12872314453125, -120.47627258300781, 308.3271484375, 44.972267150878906, 154.7093505859375, 976.0250854492188, 687.4931640625, 202.36720275878906, 245.38243103027344, 438.449951171875, 107.86195373535156, 673.0833129882812, 993.655029296875, 1146.9149169921875, 49.54969024658203, 493.599365234375, 443.734619140625, 390.0138854980469, 1227.117919921875, -17.288177490234375, 222.08184814453125, -35.21728515625, -502.1641540527344, 1099.6048583984375, 735.9557495117188, 344.884521484375, 494.4038391113281, 150.28143310546875, -73.43748474121094, 64.6153793334961, 984.8013916015625, 928.6312866210938, 143.95069885253906, 717.3374633789062, 1248.4324951171875, 253.89083862304688, 496.486328125, -465.4532775878906, -348.48974609375, 822.5199584960938, 1085.0030517578125, 766.1282958984375, 1022.90625, 792.090087890625, 886.9556884765625, 2308.622802734375, -139.2758331298828, 1206.9891357421875, -412.6666259765625, 935.2852783203125, 911.3963623046875, 228.18609619140625, 164.72689819335938, 1364.635498046875, -64.40298461914062, 311.26666259765625, 801.3735961914062, 154.81704711914062, 116.34799194335938, 796.8422241210938, 2063.60205078125, 298.8268127441406, 203.0312957763672], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000602.npy"}
|
|
{"epoch": 0.8839941262848752, "step": 603, "batch_size": 64, "mean": 450.83575439453125, "std": 661.5194091796875, "min": -831.5108032226562, "p10": -364.47956237792965, "median": 362.2799072265625, "p90": 1307.265380859375, "max": 2432.076171875, "pos_frac": 0.734375, "sample": [-388.857666015625, -268.0185241699219, 378.355712890625, 43.220481872558594, 1313.8333740234375, -20.92230224609375, 1221.743896484375, 409.86981201171875, 801.01611328125, 1233.300048828125, -72.02618408203125, 53.739219665527344, -704.1395874023438, 845.8458251953125, 50.90873336791992, 433.2872619628906, 43.153076171875, 1470.7479248046875, 1932.858642578125, 537.4513549804688, 980.2394409179688, 836.9805297851562, 670.8883056640625, 64.62391662597656, 307.198974609375, 279.5506896972656, 456.71844482421875, 156.32669067382812, -59.80996322631836, 261.14434814453125, 1578.509521484375, -831.5108032226562, 1718.358642578125, -144.36929321289062, -9.555702209472656, 346.2041015625, 957.1509399414062, -342.769287109375, 721.1527709960938, 552.4209594726562, 916.1152954101562, -403.06756591796875, -97.00541687011719, -23.94852066040039, -132.74609375, 533.830078125, 724.1527709960938, 116.42350769042969, -373.7839660644531, 418.8948974609375, 489.70843505859375, 1291.9400634765625, 890.6121826171875, -431.12506103515625, 246.4976806640625, -399.1663818359375, 2432.076171875, 328.2256164550781, 1687.101806640625, 36.74420166015625, 826.7662353515625, 1075.35595703125, 885.0625610351562, 0.003849029541015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000603.npy"}
|
|
{"epoch": 0.8854625550660793, "step": 604, "batch_size": 64, "mean": 501.3348693847656, "std": 690.4627685546875, "min": -836.5044555664062, "p10": -264.22925720214846, "median": 433.0321960449219, "p90": 1590.6401489257814, "max": 2377.7607421875, "pos_frac": 0.734375, "sample": [-383.8493957519531, 192.1807861328125, -264.9084777832031, -149.2384490966797, 672.0595092773438, 981.5582275390625, 956.4500732421875, 1100.8385009765625, 893.2445678710938, 2310.53857421875, -836.5044555664062, 973.9596557617188, -47.78746032714844, -236.08157348632812, 907.727294921875, 1086.841552734375, 888.51123046875, 115.27340698242188, 396.3074951171875, 994.2548217773438, 177.35775756835938, -490.43121337890625, 116.98054504394531, 1624.8983154296875, 588.154296875, -693.3535766601562, 1728.421875, -262.6444091796875, 133.78256225585938, 212.454833984375, 7.366371154785156, 391.0645446777344, 566.4827880859375, 481.2610778808594, -78.34805297851562, 1667.3785400390625, 639.8219604492188, 35.46497344970703, -225.07945251464844, 843.85400390625, 584.9415283203125, 414.9312438964844, 509.8826599121094, 259.40673828125, 416.1139221191406, 2377.7607421875, -477.41595458984375, 1573.4697265625, 449.9504699707031, -56.92192077636719, 606.8930053710938, 1107.23876953125, 819.3297119140625, -77.93055725097656, 1862.2720947265625, 531.8185424804688, -373.261474609375, -74.96499633789062, 359.6096496582031, 714.1862182617188, -42.58868408203125, 588.01611328125, 1597.9989013671875, 398.4314880371094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000604.npy"}
|
|
{"epoch": 0.8869309838472834, "step": 605, "batch_size": 64, "mean": 365.2569885253906, "std": 620.79296875, "min": -1035.4881591796875, "p10": -287.6330841064453, "median": 256.8729934692383, "p90": 1082.4372436523443, "max": 2735.85302734375, "pos_frac": 0.75, "sample": [-57.23228454589844, -335.63006591796875, -22.6514892578125, 216.251953125, 71.0066146850586, 90.45259094238281, 845.8928833007812, 679.9901733398438, 882.27880859375, 242.7049560546875, 672.8538208007812, -56.653778076171875, 73.42962646484375, -306.106201171875, 440.3497009277344, 466.627197265625, 135.0741424560547, 824.8309936523438, 1550.9564208984375, -190.7967071533203, 2735.85302734375, 33.466033935546875, 589.87255859375, -29.455900192260742, 211.3767547607422, 965.0234375, -145.30511474609375, -292.6588439941406, 358.73468017578125, 329.580810546875, 77.11522674560547, -275.90631103515625, 879.4883422851562, -492.5296936035156, -169.94732666015625, 420.3753356933594, 1187.1771240234375, 88.78316497802734, 258.91265869140625, 750.1544189453125, 102.58372497558594, 2305.0693359375, -735.621337890625, -1035.4881591796875, 503.43817138671875, 426.4117431640625, -309.50616455078125, 1147.3304443359375, 93.65576934814453, 6.30925178527832, 497.1920471191406, 346.41607666015625, 1230.9727783203125, 244.93167114257812, 240.48741149902344, 317.29010009765625, 440.7279357910156, 1132.7574462890625, 452.9875183105469, 254.8333282470703, 950.65966796875, 376.9054870605469, -147.63775634765625, 830.0008544921875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000605.npy"}
|
|
{"epoch": 0.8883994126284875, "step": 606, "batch_size": 64, "mean": 401.52313232421875, "std": 565.43896484375, "min": -569.9078369140625, "p10": -335.14705505371086, "median": 301.57373046875, "p90": 1083.7690063476566, "max": 2078.86962890625, "pos_frac": 0.765625, "sample": [-499.17071533203125, 113.17623901367188, 110.0853500366211, -358.5435485839844, 364.0718078613281, 1001.5360107421875, 239.12399291992188, 201.346923828125, 1160.3226318359375, 808.8602905273438, 176.4040069580078, 2078.86962890625, 890.35498046875, -85.6390380859375, -447.8006896972656, 232.90838623046875, 519.6370849609375, 247.53604125976562, 330.7175598144531, 1326.1552734375, 650.84912109375, -448.5374755859375, 1638.9859619140625, 21.691795349121094, 985.1436767578125, 625.7420654296875, 557.7347412109375, 174.05177307128906, -102.4946060180664, -280.55523681640625, 539.5078735351562, 715.75439453125, -418.6226806640625, 276.47210693359375, -499.3150329589844, 322.18389892578125, 200.10357666015625, 746.5303344726562, -115.89884185791016, -102.06184387207031, -569.9078369140625, -116.45417022705078, 1889.508544921875, 1119.01171875, 237.66403198242188, 656.658935546875, 36.18451690673828, 608.2791748046875, 872.911865234375, 356.21759033203125, -73.92668151855469, 842.3983154296875, -61.22352600097656, 244.25233459472656, 904.6841430664062, 1444.763671875, 272.3145751953125, 700.3281860351562, 26.38372039794922, 280.96356201171875, 609.799072265625, 580.0504760742188, 560.199951171875, 379.2005920410156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000606.npy"}
|
|
{"epoch": 0.8898678414096917, "step": 607, "batch_size": 64, "mean": 342.0454406738281, "std": 577.7899169921875, "min": -1162.990478515625, "p10": -234.09727478027344, "median": 310.7872619628906, "p90": 998.582232666016, "max": 2503.743896484375, "pos_frac": 0.796875, "sample": [474.059326171875, -104.46229553222656, 517.6988525390625, 242.56358337402344, 134.734619140625, 170.95814514160156, 1136.4833984375, 350.3511962890625, 2.556396484375, 1243.645751953125, -309.380615234375, 648.8860473632812, 442.3092346191406, -235.29721069335938, 100.93222045898438, 1039.48779296875, -614.3320922851562, 185.88031005859375, 308.0406494140625, 89.56781768798828, -172.7587890625, 146.7886505126953, 335.4808654785156, 482.4007568359375, -452.2463684082031, 609.1273803710938, -166.81951904296875, 1537.8538818359375, 735.2274169921875, 58.71918869018555, 641.8756103515625, 112.67530059814453, 735.0079956054688, 313.53387451171875, 495.56634521484375, 333.509521484375, 903.1359252929688, 18.096923828125, 1501.104736328125, 387.5562438964844, -988.7169189453125, 581.5763549804688, -231.29742431640625, 511.7252197265625, 734.4823608398438, 92.30810546875, -5.475730895996094, 156.75262451171875, -1162.990478515625, 260.71026611328125, 528.436767578125, 363.0108947753906, -177.94168090820312, 619.332763671875, 246.58273315429688, 78.51930236816406, 211.3023681640625, 2503.743896484375, 1493.8924560546875, 726.216552734375, 303.3755187988281, -370.25982666015625, 571.7153930664062, 463.3869934082031], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000607.npy"}
|
|
{"epoch": 0.8913362701908958, "step": 608, "batch_size": 64, "mean": 400.59295654296875, "std": 503.2538146972656, "min": -489.6466369628906, "p10": -192.36787414550776, "median": 411.6615447998047, "p90": 943.5206481933595, "max": 1763.410888671875, "pos_frac": 0.78125, "sample": [732.4363403320312, 792.90966796875, 369.5085144042969, 529.8427734375, -489.6466369628906, 1574.451171875, -352.5489196777344, 150.8716583251953, -133.6028594970703, 669.3125, 770.9982299804688, 0.6926097869873047, 7.569068908691406, 694.7987670898438, -138.6513671875, 161.87530517578125, -440.6174011230469, 443.5408935546875, -100.92927551269531, 326.9819030761719, 498.1739501953125, 682.5678100585938, 87.81706237792969, 744.9830322265625, 101.7397232055664, 431.09344482421875, 697.2303466796875, 468.7112121582031, 957.4470825195312, -140.5523223876953, 58.347713470458984, -47.17436218261719, 250.96958923339844, 392.2296447753906, 700.7119140625, 272.19317626953125, 456.20843505859375, 465.661376953125, 1201.694580078125, -354.87310791015625, 1739.4039306640625, 253.6502685546875, 642.471923828125, 1099.9442138671875, 1240.3641357421875, 166.22576904296875, 84.6270751953125, 812.52099609375, -49.381752014160156, -214.5745391845703, 662.6527099609375, -1.0912399291992188, 549.6829833984375, 783.7323608398438, -302.3079528808594, 911.025634765625, -428.2005310058594, 66.79385375976562, 1763.410888671875, 745.0848999023438, 786.677001953125, 473.15008544921875, 235.52719116210938, 121.584716796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000608.npy"}
|
|
{"epoch": 0.8928046989720999, "step": 609, "batch_size": 64, "mean": 506.5679016113281, "std": 787.3883056640625, "min": -1195.086181640625, "p10": -276.63756866455066, "median": 431.0681610107422, "p90": 1230.3578369140628, "max": 3893.398193359375, "pos_frac": 0.796875, "sample": [133.62387084960938, 53.46540069580078, 109.09008026123047, -669.1119384765625, 222.81382751464844, 439.2563171386719, 1256.990234375, 525.6204223632812, 610.5444946289062, -404.53961181640625, 791.3964233398438, 1712.9637451171875, 104.45106506347656, 439.2138671875, 822.365478515625, -1195.086181640625, -131.45388793945312, 536.9012451171875, 922.9047241210938, 422.9224548339844, 775.8509521484375, -87.8424072265625, 717.9173583984375, 706.7376098632812, -379.7940673828125, 596.9168701171875, 1322.5006103515625, 235.41909790039062, -501.4877014160156, 682.9729614257812, 1609.234130859375, 708.6707763671875, 2484.779541015625, 517.6950073242188, 2741.432373046875, 985.9937744140625, -49.16132354736328, 741.34375, -78.00926971435547, 311.0040588378906, 306.9465637207031, 175.33973693847656, 825.9443359375, 3893.398193359375, 692.9315185546875, 99.70452880859375, 384.1425476074219, -137.61082458496094, -743.2090454101562, 180.47080993652344, 299.54119873046875, 294.6091613769531, 1082.6998291015625, 21.177993774414062, -3.8038253784179688, 1082.5458984375, 295.6608581542969, 671.7144165039062, 634.714111328125, 1168.215576171875, 260.8251953125, 490.39825439453125, -336.220458984375, 33.7038459777832], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000609.npy"}
|
|
{"epoch": 0.8942731277533039, "step": 610, "batch_size": 64, "mean": 320.3234558105469, "std": 583.2960815429688, "min": -1910.7852783203125, "p10": -386.4082977294922, "median": 288.99049377441406, "p90": 903.921667480469, "max": 1747.5306396484375, "pos_frac": 0.765625, "sample": [635.81298828125, 696.453369140625, 218.36854553222656, 189.2549285888672, -392.15234375, 725.2078857421875, 78.14106750488281, 597.1239013671875, 548.2081909179688, -503.13214111328125, 19.03339385986328, 530.5128173828125, -11.607109069824219, 1306.714599609375, 730.1431884765625, 382.0540771484375, 249.22042846679688, 188.894287109375, -566.292724609375, -1910.7852783203125, 577.3522338867188, 1747.5306396484375, 182.9849853515625, 1042.32470703125, -186.9803466796875, 371.424072265625, 175.59292602539062, -403.23663330078125, 704.4076538085938, 704.44873046875, 1560.6236572265625, 10.089065551757812, -1.796142578125, 713.3140869140625, 343.73223876953125, 1455.0706787109375, -373.0055236816406, 203.67092895507812, 328.5577087402344, 612.0339965820312, 666.4228515625, 417.14068603515625, -54.36823654174805, 743.7675170898438, 134.38876342773438, 227.85580444335938, 929.0647583007812, 136.88717651367188, -98.5546875, 288.40576171875, 34.92506408691406, 704.63720703125, 244.2660369873047, -95.58909606933594, 627.63525390625, 78.4269790649414, 342.1847229003906, 363.36016845703125, -159.94850158691406, 289.5752258300781, -459.4740905761719, -715.8232421875, 845.2544555664062, 1530.947265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000610.npy"}
|
|
{"epoch": 0.895741556534508, "step": 611, "batch_size": 64, "mean": 539.90380859375, "std": 750.0967407226562, "min": -786.6029052734375, "p10": -278.99327392578124, "median": 428.5040283203125, "p90": 1434.107958984375, "max": 2637.351806640625, "pos_frac": 0.765625, "sample": [1216.6851806640625, 424.1744689941406, 2585.72021484375, 1654.0096435546875, 1158.8975830078125, 567.433349609375, -68.53357696533203, 1324.323486328125, 34.350730895996094, 264.7084045410156, 319.8925476074219, 748.8709106445312, 16.479290008544922, -141.0272674560547, -774.497314453125, -283.5169677734375, -398.5435791015625, -786.6029052734375, 234.52822875976562, 1414.9671630859375, -711.6329956054688, 1248.72314453125, 2300.783935546875, -443.684814453125, 1442.3111572265625, 584.8079223632812, 54.481422424316406, 1054.5909423828125, 573.6869506835938, 152.7568817138672, -39.5628662109375, 1115.0626220703125, 429.87640380859375, -39.101234436035156, 427.13165283203125, 1114.4359130859375, 69.69256591796875, 171.7734375, -191.38833618164062, 281.4446105957031, 974.0927124023438, 357.1716613769531, 3.6814117431640625, 1752.3702392578125, 438.10382080078125, 499.39801025390625, -300.3877868652344, 544.30419921875, 1084.0419921875, -117.00394439697266, 639.117431640625, -268.43798828125, 245.33837890625, 496.8497314453125, 774.483154296875, 510.74468994140625, 358.8536682128906, 1713.5860595703125, 480.32000732421875, 1406.3597412109375, 1181.159912109375, -4.600837707519531, 2637.351806640625, 38.43734359741211], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000611.npy"}
|
|
{"epoch": 0.8972099853157122, "step": 612, "batch_size": 64, "mean": 408.61627197265625, "std": 560.021484375, "min": -818.4468383789062, "p10": -241.06194000244136, "median": 343.74322509765625, "p90": 1019.7358337402344, "max": 2348.142333984375, "pos_frac": 0.796875, "sample": [997.4669799804688, 270.1341857910156, 334.7679748535156, -414.3048400878906, -93.8505859375, 3.7978763580322266, 997.3853149414062, 289.1054992675781, 414.95501708984375, 432.53826904296875, -621.86962890625, 830.2635498046875, 126.51736450195312, 1014.75341796875, 851.95166015625, 1021.8711547851562, 592.161865234375, 259.83740234375, 522.1005249023438, 1188.16455078125, 855.5201416015625, 92.42630767822266, -818.4468383789062, 1277.0406494140625, 735.5875854492188, -255.457275390625, 799.3256225585938, -305.30010986328125, 583.6823120117188, 749.3851318359375, 540.6344604492188, -793.4696044921875, 136.32289123535156, 352.7184753417969, 174.67388916015625, -327.6884765625, 621.7327270507812, 401.6341857910156, 113.822998046875, -194.20166015625, 137.17701721191406, 174.0575714111328, 143.25509643554688, 193.66232299804688, 2348.142333984375, 39.439762115478516, 791.3021240234375, 985.2260131835938, 1203.6552734375, 240.82861328125, 130.2857208251953, 731.95947265625, 1388.7078857421875, 141.52145385742188, -47.70085144042969, -207.4728240966797, 526.7369384765625, -4.9988861083984375, 931.4917602539062, -157.91136169433594, 1123.7667236328125, 592.4022216796875, 904.8382568359375, 83.377197265625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000612.npy"}
|
|
{"epoch": 0.8986784140969163, "step": 613, "batch_size": 64, "mean": 507.1754150390625, "std": 609.2411499023438, "min": -860.1251220703125, "p10": -38.087606811523415, "median": 449.3294982910156, "p90": 1239.3911132812502, "max": 2455.6318359375, "pos_frac": 0.84375, "sample": [594.0606689453125, 1260.07177734375, 1296.3511962890625, -152.0593719482422, 1079.9168701171875, 1191.13623046875, 28.214069366455078, 1076.1204833984375, 935.1968994140625, 23.90171241760254, 1499.2196044921875, 200.69985961914062, 618.2977905273438, 288.36712646484375, 1618.985595703125, -342.74932861328125, 5.4236907958984375, 400.54608154296875, 455.09210205078125, -47.33174133300781, 377.6143798828125, 95.97570037841797, 690.6640014648438, 665.51953125, 1120.9765625, 300.7166748046875, 1112.224609375, 1028.723388671875, -5.99066162109375, 575.0891723632812, 766.43994140625, 21.727630615234375, 10.711858749389648, 656.0686645507812, 443.56689453125, 1774.360107421875, 676.2818603515625, 705.7840576171875, 2455.6318359375, 3.838184356689453, -845.4503784179688, 1120.9405517578125, 682.2012329101562, 857.02294921875, 690.713134765625, -860.1251220703125, 127.84956359863281, 1506.33935546875, -1.0611610412597656, 238.9654541015625, 854.4072265625, -271.561767578125, -445.4597473144531, 169.67431640625, 1013.8145751953125, 533.8615112304688, 162.1612548828125, 343.8906555175781, 52.50102996826172, 641.1845703125, -16.517959594726562, 79.1946792602539, 223.65963745117188, 95.63581848144531], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000613.npy"}
|
|
{"epoch": 0.9001468428781204, "step": 614, "batch_size": 64, "mean": 489.30645751953125, "std": 681.6434326171875, "min": -887.33056640625, "p10": -244.16184387207022, "median": 465.4331970214844, "p90": 1329.2904174804687, "max": 2588.90185546875, "pos_frac": 0.75, "sample": [207.60525512695312, 421.43182373046875, -590.4451904296875, 941.5919189453125, -110.69786071777344, 1319.906982421875, 21.939212799072266, 404.2627868652344, 550.0628662109375, -20.09023666381836, 1290.034912109375, 194.4571075439453, 516.851318359375, 887.9043579101562, 283.90032958984375, -167.87557983398438, 1389.366455078125, 742.312255859375, 148.3512420654297, 1.6247367858886719, 1083.598388671875, -16.873291015625, 709.8820190429688, 1001.888671875, -567.4324340820312, -113.45838165283203, 509.4345703125, -134.66970825195312, 614.7520751953125, -276.85595703125, 575.8195190429688, 1764.68603515625, -158.60955810546875, 804.9462280273438, -583.9814453125, -157.02764892578125, 719.0694580078125, -698.3860473632812, -5.98805046081543, 726.82861328125, 1561.355712890625, 389.5596008300781, 1597.227294921875, 311.9756164550781, 1066.2601318359375, 1091.9749755859375, 1282.358154296875, 309.385009765625, 660.9852905273438, 723.6709594726562, 3.7560043334960938, 333.1338806152344, 1333.3118896484375, 103.03155517578125, 656.1838989257812, 2588.90185546875, 1844.0360107421875, 121.59918212890625, -652.7322998046875, -887.33056640625, 1193.1990966796875, 157.70166015625, 528.5172119140625, 767.433349609375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000614.npy"}
|
|
{"epoch": 0.9016152716593245, "step": 615, "batch_size": 64, "mean": 411.6612854003906, "std": 655.2713623046875, "min": -1777.503173828125, "p10": -238.64943695068357, "median": 268.02159118652344, "p90": 1090.75703125, "max": 2531.9345703125, "pos_frac": 0.828125, "sample": [151.34048461914062, 726.9489135742188, 168.39901733398438, 1431.7626953125, 254.40553283691406, 1261.7706298828125, -21.707748413085938, 5.903755187988281, 8.54327392578125, 99.32138061523438, 923.5103759765625, 270.060791015625, 248.44300842285156, -312.883056640625, 715.2901000976562, -504.9622497558594, 999.8927612304688, -1777.503173828125, 265.9823913574219, 400.94647216796875, 287.558837890625, 160.771484375, 1959.5391845703125, 143.26123046875, 71.02983856201172, 1015.5159912109375, -212.38656616210938, 449.0341796875, 563.3411865234375, 73.95462036132812, 409.69024658203125, 1017.4242553710938, 650.13427734375, 1061.830078125, 1321.6124267578125, 63.415992736816406, 103.54118347167969, 114.34921264648438, -517.5985107421875, -105.60968780517578, -720.7353515625, 1062.2474365234375, 1094.987060546875, 58.91596984863281, 1643.6085205078125, 534.7306518554688, 565.0786743164062, 1080.886962890625, 158.59283447265625, 460.9177551269531, 960.9036254882812, -249.9049530029297, 224.4571075439453, 10.105682373046875, 756.2160034179688, -386.7135314941406, 217.61529541015625, 433.96466064453125, 504.2506408691406, 815.8069458007812, -177.1867218017578, 2531.9345703125, 684.4345092773438, 135.33412170410156], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000615.npy"}
|
|
{"epoch": 0.9030837004405287, "step": 616, "batch_size": 64, "mean": 334.37677001953125, "std": 578.4204711914062, "min": -823.6965942382812, "p10": -376.19680175781247, "median": 313.39817810058594, "p90": 1135.2195556640627, "max": 1737.42822265625, "pos_frac": 0.734375, "sample": [922.39208984375, 67.40362548828125, 142.88052368164062, 745.5133056640625, 545.6558837890625, -1.971944808959961, 259.3746337890625, -206.53451538085938, 350.5400390625, 1175.499267578125, 1375.1192626953125, 624.6495971679688, 158.04104614257812, 84.44633483886719, -730.3785400390625, 1152.5050048828125, 463.3286437988281, 276.2563171386719, 110.95812225341797, -123.2085189819336, 1496.619873046875, 573.5361328125, 511.9345397949219, 399.2583312988281, -366.4715270996094, -641.6398315429688, 550.7854614257812, 360.29443359375, -79.8786849975586, 1233.4359130859375, 1624.0482177734375, 157.56788635253906, -497.11553955078125, -6.131080627441406, 82.10606384277344, -823.6965942382812, -168.74472045898438, 240.85592651367188, 905.3576049804688, 633.5519409179688, 389.84173583984375, 370.85321044921875, 356.8626708984375, 568.0393676757812, -747.6434936523438, 1002.7584228515625, 142.1366729736328, -335.7039794921875, 135.20452880859375, 773.403564453125, 457.0076904296875, -380.3647766113281, 1737.42822265625, 357.592041015625, 725.49609375, 160.98919677734375, 243.27439880371094, 170.3175811767578, -314.315673828125, 630.6082153320312, 1065.3553466796875, -364.33966064453125, 1094.8868408203125, -417.7200622558594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000616.npy"}
|
|
{"epoch": 0.9045521292217328, "step": 617, "batch_size": 64, "mean": 241.14703369140625, "std": 576.4742431640625, "min": -1234.5537109375, "p10": -407.1669067382812, "median": 236.93768310546875, "p90": 780.1889160156253, "max": 2127.309814453125, "pos_frac": 0.75, "sample": [278.3953857421875, 203.82611083984375, -185.01834106445312, 303.6658935546875, 379.1107482910156, -250.72613525390625, 1.7092227935791016, -627.0670776367188, 659.4664916992188, 185.53416442871094, -286.6872253417969, 378.8561096191406, 369.3089904785156, 439.7112731933594, 207.88534545898438, 242.43075561523438, 475.17779541015625, 155.11680603027344, 63.80085754394531, 1383.23486328125, 669.3270263671875, 574.929443359375, 205.441650390625, 43.93834686279297, 421.4794616699219, 457.4638977050781, 93.0941390991211, -280.0281677246094, 332.1282958984375, -1234.5537109375, 83.48202514648438, 407.0497741699219, -378.02667236328125, -348.4054260253906, 683.3892822265625, 284.941650390625, 1128.109619140625, 811.6265869140625, 557.093505859375, 454.7384033203125, 536.8114013671875, -493.59967041015625, 842.1473999023438, 517.9638061523438, 1933.3126220703125, 440.53387451171875, 706.8343505859375, -647.00390625, 196.97567749023438, 589.3384399414062, -1172.1162109375, 172.95567321777344, 98.03862762451172, 210.21710205078125, 231.44461059570312, -112.82424926757812, 882.1236572265625, -151.8603515625, -34.28997039794922, -419.65557861328125, 2127.309814453125, 310.62628173828125, -778.5784912109375, 101.75479125976562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000617.npy"}
|
|
{"epoch": 0.9060205580029369, "step": 618, "batch_size": 64, "mean": 522.3434448242188, "std": 838.8261108398438, "min": -1727.7322998046875, "p10": -456.5450408935547, "median": 440.16827392578125, "p90": 1321.3440307617188, "max": 3020.472412109375, "pos_frac": 0.765625, "sample": [-1727.7322998046875, 849.544921875, 288.18377685546875, 278.4665832519531, 124.4007339477539, 254.7455291748047, 147.37237548828125, 67.49171447753906, 2392.373291015625, 883.5899047851562, 249.96470642089844, -893.4192504882812, 801.2232055664062, 213.1176300048828, 1243.54833984375, 313.7468566894531, -362.91510009765625, 1208.2171630859375, 720.7251586914062, -391.2889099121094, -183.337646484375, -462.0552062988281, -443.68798828125, 90.43913269042969, -796.9523315429688, -607.3505249023438, 102.29537200927734, 1334.5704345703125, 1032.845458984375, 122.63735961914062, 583.0867919921875, 440.7642822265625, 707.7798461914062, 991.15966796875, 979.6806640625, -928.2387084960938, -467.7392272949219, 1392.669677734375, 342.1920471191406, 528.2789306640625, 897.6275024414062, 1290.482421875, 972.305419921875, 2470.1806640625, 999.1846313476562, -87.19635772705078, 439.572265625, 659.7249145507812, 380.055419921875, 3020.472412109375, 856.5701904296875, -15.269296646118164, 1476.86669921875, 1228.9622802734375, 909.2271728515625, 490.9530029296875, -169.3470916748047, 1190.7579345703125, 2577.331298828125, -61.073394775390625, 427.2431640625, 1065.5706787109375, 599.8961181640625, 389.48870849609375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000618.npy"}
|
|
{"epoch": 0.9074889867841409, "step": 619, "batch_size": 64, "mean": 400.05615234375, "std": 811.0994262695312, "min": -1132.9000244140625, "p10": -549.6720031738281, "median": 303.95091247558594, "p90": 1448.1886352539068, "max": 3109.224609375, "pos_frac": 0.671875, "sample": [-1017.5045166015625, -668.17431640625, 11.0892333984375, 1209.2410888671875, 1516.435791015625, -102.6094741821289, 342.5334167480469, 235.41580200195312, 313.40765380859375, -477.9174499511719, 593.9485473632812, 715.3480224609375, 431.35723876953125, 1783.7529296875, -678.1210327148438, 198.8665771484375, 1197.533935546875, 460.85626220703125, 1163.9422607421875, 1145.4378662109375, -823.524169921875, -733.0370483398438, 1274.3974609375, 1316.20556640625, -20.341079711914062, -481.82391357421875, -103.7030258178711, 558.3375244140625, 665.988037109375, -93.87542724609375, 99.58193969726562, 623.6165161132812, -395.17095947265625, 28.719970703125, 1503.46484375, -379.4341125488281, -57.21539306640625, -187.7476043701172, 798.0177001953125, 653.7457885742188, -53.31777572631836, 1696.59375, 593.1343383789062, 570.0443725585938, 1039.05419921875, 77.48104858398438, -194.8236846923828, 2603.99755859375, 1546.5592041015625, 311.0364990234375, 112.5700454711914, 204.15695190429688, 434.8150939941406, 3109.224609375, -1132.9000244140625, -578.749755859375, 650.2149658203125, -116.7957763671875, 1319.2108154296875, -31.61166763305664, 296.8653259277344, 155.98056030273438, 1.0006542205810547, 368.8121643066406], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000619.npy"}
|
|
{"epoch": 0.908957415565345, "step": 620, "batch_size": 64, "mean": 380.65301513671875, "std": 610.8950805664062, "min": -1066.031982421875, "p10": -257.8278106689453, "median": 354.47142028808594, "p90": 1252.6873291015627, "max": 1934.740234375, "pos_frac": 0.75, "sample": [396.3836364746094, -336.3392333984375, 495.0657958984375, 651.1436767578125, 482.80426025390625, 810.173095703125, 349.47589111328125, 271.08306884765625, -76.91516876220703, 1328.2882080078125, -249.93698120117188, 381.61505126953125, -261.2095947265625, 1206.0518798828125, 452.3660888671875, 141.9721221923828, 1056.737060546875, 537.4061279296875, -88.89224243164062, 1385.6072998046875, 1934.740234375, 208.774658203125, 402.2976989746094, 1097.2777099609375, -500.9939270019531, 197.85238647460938, 670.9050903320312, 255.90823364257812, 222.1776885986328, 82.22782897949219, -35.39379119873047, 1791.5865478515625, 413.72161865234375, 1803.218017578125, -140.0882568359375, 523.6105346679688, 43.703636169433594, 552.37646484375, -48.721012115478516, 403.5787353515625, 376.6315002441406, 463.583251953125, -200.46258544921875, -109.10160827636719, 1272.6739501953125, 762.204833984375, 714.86767578125, -1065.250244140625, 143.30792236328125, 200.49349975585938, -218.44012451171875, -1066.031982421875, 1776.976318359375, 359.4669494628906, 98.32383728027344, 235.11495971679688, 499.8456726074219, 161.18942260742188, -440.3984375, 331.9354248046875, 314.7726135253906, 931.48583984375, -357.7216491699219, 364.6845703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000620.npy"}
|
|
{"epoch": 0.9104258443465492, "step": 621, "batch_size": 64, "mean": 551.19140625, "std": 720.2650146484375, "min": -1603.9794921875, "p10": -243.41492462158197, "median": 507.34156799316406, "p90": 1408.5875122070318, "max": 3010.297119140625, "pos_frac": 0.84375, "sample": [661.6509399414062, -429.9609375, -757.996826171875, -192.18849182128906, 1598.338623046875, 20.074562072753906, 123.82413482666016, 675.9281616210938, 646.503173828125, 940.5148315429688, 862.5390014648438, 327.1602783203125, 315.1707763671875, 878.04443359375, 210.24163818359375, -498.0923767089844, -265.3691101074219, 891.2470092773438, -1603.9794921875, 193.21368408203125, 1533.9583740234375, 190.52667236328125, 952.2142333984375, 536.5043334960938, 1200.7481689453125, 3010.297119140625, 170.04159545898438, 1072.5943603515625, 1065.807861328125, -63.88876724243164, 1297.1072998046875, 796.263671875, 906.8276977539062, 304.30218505859375, 856.4963989257812, 1726.69091796875, 178.67926025390625, 721.7078857421875, 427.5782165527344, 478.1788024902344, -119.95750427246094, 1265.140625, -281.5199890136719, 610.032470703125, 210.15872192382812, 158.7706298828125, 1456.36474609375, 353.5538330078125, 1256.343017578125, 1202.2518310546875, -1032.6812744140625, 368.86798095703125, 83.68367767333984, 133.47186279296875, 255.2814178466797, 641.0772705078125, 931.0358276367188, 1654.3448486328125, 244.25308227539062, 339.21136474609375, 1716.045166015625, 934.14501953125, 725.476806640625, 211.39739990234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000621.npy"}
|
|
{"epoch": 0.9118942731277533, "step": 622, "batch_size": 64, "mean": 397.30218505859375, "std": 627.1004638671875, "min": -1102.440185546875, "p10": -177.88167114257806, "median": 386.6011657714844, "p90": 979.6090881347659, "max": 2692.66552734375, "pos_frac": 0.8125, "sample": [669.7999877929688, 335.9076232910156, -210.07736206054688, 403.70574951171875, 574.5360717773438, 388.9584045410156, -725.183837890625, 387.4468994140625, 2488.486328125, 560.0323486328125, 65.8954849243164, -401.10552978515625, 107.91224670410156, -317.22601318359375, -29.131134033203125, 561.6260986328125, 199.87696838378906, 385.75543212890625, 84.83425903320312, 192.40565490722656, 1180.8992919921875, -65.97052001953125, 615.7379150390625, 2692.66552734375, 791.4605102539062, 69.85116577148438, 553.1773681640625, 736.13330078125, 175.16615295410156, -1022.949951171875, 208.7265625, 17.2222900390625, 1003.6431274414062, 149.99452209472656, 275.40362548828125, 923.5296630859375, 451.939697265625, -513.7972412109375, 1009.1773071289062, 525.370849609375, 865.137451171875, 836.6810302734375, 19.122962951660156, 647.726806640625, 237.00173950195312, -64.25128173828125, 428.8727111816406, -102.75839233398438, 57.72926330566406, 314.1024169921875, 1451.3753662109375, 532.0452270507812, 721.782470703125, 646.331787109375, 581.5738525390625, 732.579833984375, -1102.440185546875, -5.143013000488281, 126.61445617675781, 1460.7012939453125, 660.6132202148438, 606.171630859375, 20.65860366821289, 253.27337646484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000622.npy"}
|
|
{"epoch": 0.9133627019089574, "step": 623, "batch_size": 64, "mean": 313.7362060546875, "std": 612.8697509765625, "min": -1067.54248046875, "p10": -257.08003234863276, "median": 226.86019897460938, "p90": 1068.154351806641, "max": 2710.0458984375, "pos_frac": 0.71875, "sample": [-11.58685302734375, 231.39639282226562, -809.7929077148438, 1368.300048828125, -95.6932373046875, -53.17536544799805, 685.0505981445312, 158.72781372070312, 787.2691040039062, 34.2000732421875, 1438.8984375, 267.86474609375, 597.5066528320312, 178.9045867919922, 656.8507690429688, -465.1283874511719, 707.591064453125, 331.35394287109375, -226.4786376953125, 50.5767822265625, -150.65379333496094, 443.9722900390625, 1227.8953857421875, -165.7896728515625, -1067.54248046875, 130.8037109375, 348.0404052734375, -690.86962890625, 423.011474609375, 602.7999877929688, -270.1949157714844, 499.5755310058594, -129.9547119140625, -39.925697326660156, -170.75299072265625, 95.37477111816406, 958.4530639648438, 690.3949584960938, 942.1585693359375, 200.79452514648438, 61.12550354003906, -589.135986328125, 355.4526062011719, 1360.4090576171875, 57.612335205078125, -38.51042556762695, 2710.0458984375, 518.9251098632812, -40.12003707885742, -384.6198425292969, 1115.169189453125, 589.9215087890625, 285.83111572265625, 380.73602294921875, 222.32400512695312, 1765.2451171875, 85.152099609375, 87.89915466308594, 277.23468017578125, 6.360485076904297, 513.1676025390625, 275.033935546875, 720.700439453125, 32.9295654296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000623.npy"}
|
|
{"epoch": 0.9148311306901615, "step": 624, "batch_size": 64, "mean": 467.7041015625, "std": 573.8270874023438, "min": -1199.674560546875, "p10": -218.0212677001953, "median": 498.77928161621094, "p90": 1192.6002807617188, "max": 1461.87744140625, "pos_frac": 0.828125, "sample": [585.7987060546875, -374.06890869140625, 702.032470703125, 360.522705078125, 497.08648681640625, 1006.6058349609375, 1187.0516357421875, 1461.87744140625, 1376.560546875, 196.49468994140625, 186.36328125, 10.72601318359375, 126.90152740478516, 1194.978271484375, 950.644287109375, 8.2674560546875, 584.8799438476562, 741.2752075195312, 1217.859375, -1103.3006591796875, 75.89552307128906, 234.1411590576172, 1.1917800903320312, 908.763427734375, 415.2911682128906, 211.2344970703125, 906.5816040039062, 800.4738159179688, 1092.9976806640625, -223.98196411132812, 167.88465881347656, 1092.6793212890625, -204.11297607421875, 285.0674133300781, -312.58551025390625, 941.7162475585938, 391.3602294921875, -93.78302001953125, 914.7318115234375, 410.3443603515625, -1199.674560546875, 78.07425689697266, -66.598876953125, 720.5785522460938, 979.377197265625, 1002.2903442382812, 280.92193603515625, 682.9310302734375, 1016.7503662109375, 500.4720764160156, 117.05793762207031, -415.4503173828125, 8.81859016418457, 1226.3748779296875, 603.2825927734375, 711.751220703125, -321.3438415527344, 659.4384765625, -108.36262512207031, 652.3182373046875, 1286.4886474609375, 161.50006103515625, 1339.92822265625, 1081.68994140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000624.npy"}
|
|
{"epoch": 0.9162995594713657, "step": 625, "batch_size": 64, "mean": 524.8773193359375, "std": 563.9940185546875, "min": -447.3465881347656, "p10": -23.857870483398415, "median": 437.4485778808594, "p90": 1236.61767578125, "max": 2252.187744140625, "pos_frac": 0.875, "sample": [58.311485290527344, 148.68057250976562, -327.8778381347656, 939.1499633789062, 357.56622314453125, 37.13433837890625, 819.707763671875, 1175.654541015625, 1045.120849609375, 1000.3526611328125, 39.681243896484375, 618.161376953125, 275.26177978515625, 62.00004577636719, 1982.22998046875, 630.3933715820312, 234.3680419921875, 107.14454650878906, 2252.187744140625, 474.72149658203125, 1945.6917724609375, 251.92994689941406, 91.27823638916016, 744.9859008789062, 155.88113403320312, 714.52783203125, 1767.0196533203125, -32.88374328613281, 331.97930908203125, 314.56903076171875, 412.17889404296875, 709.8387451171875, 490.5126647949219, -436.89031982421875, 523.8927612304688, -111.98355102539062, 84.9991226196289, 604.0260009765625, 296.2891540527344, 794.88818359375, 1247.3560791015625, 420.49066162109375, 1386.6279296875, 454.406494140625, 118.25514221191406, 475.7296447753906, -81.60154724121094, 473.19647216796875, 670.8212890625, 798.2130737304688, 106.83316802978516, -2.7975006103515625, 926.8648681640625, 359.89337158203125, 136.75660705566406, 141.91290283203125, 186.3415069580078, 495.1096496582031, 1211.5614013671875, -447.3465881347656, 733.5698852539062, -132.40548706054688, 1393.030029296875, 936.6465454101562], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000625.npy"}
|
|
{"epoch": 0.9177679882525698, "step": 626, "batch_size": 64, "mean": 284.48504638671875, "std": 644.917724609375, "min": -2501.293212890625, "p10": -322.6523956298827, "median": 224.28958892822266, "p90": 943.5091552734377, "max": 1912.6240234375, "pos_frac": 0.734375, "sample": [665.6715087890625, 61.000732421875, 465.3983154296875, 389.27191162109375, 177.16873168945312, 211.4592742919922, 723.5122680664062, 213.81149291992188, 879.4739379882812, -2501.293212890625, 1594.0601806640625, 234.76768493652344, 1531.9600830078125, 1912.6240234375, -587.6716918945312, 176.20993041992188, 254.373291015625, -369.6172180175781, -5.45361328125, -156.28631591796875, 15.78875732421875, 210.8525848388672, 577.7120361328125, 118.83306121826172, 72.60978698730469, 1.3457489013671875, 156.00885009765625, -618.8582763671875, 467.6172180175781, 634.0107421875, -126.70097351074219, 648.39013671875, -535.9703369140625, -759.9549560546875, 608.0884399414062, 1191.396240234375, 759.9655151367188, 370.1361389160156, 334.4771728515625, 643.6995239257812, 558.4501342773438, -213.06781005859375, 970.9528198242188, 499.30328369140625, 1675.477294921875, 722.1942138671875, -104.69904327392578, 128.53976440429688, 124.40499877929688, 391.97027587890625, -116.2906494140625, 814.7535400390625, 330.1058349609375, 1100.085205078125, -19.073013305664062, 211.20664978027344, 662.6919555664062, 398.6949462890625, -717.765625, -93.15690612792969, 93.86589050292969, -122.7044906616211, 292.16302490234375, -20.947856903076172], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000626.npy"}
|
|
{"epoch": 0.9192364170337739, "step": 627, "batch_size": 64, "mean": 586.0703125, "std": 831.2222900390625, "min": -701.0098266601562, "p10": -149.28670654296874, "median": 430.69078063964844, "p90": 1477.503259277344, "max": 3805.1328125, "pos_frac": 0.75, "sample": [642.0808715820312, 267.16204833984375, 1143.2010498046875, 484.4393615722656, -183.14535522460938, -48.41560363769531, 105.29297637939453, 700.6536865234375, 576.71484375, 494.1810607910156, 1336.0003662109375, -533.0965576171875, 235.6429443359375, 459.9217224121094, -107.3928451538086, 981.7846069335938, -149.24539184570312, 719.9708862304688, 979.3817138671875, 1192.9957275390625, -670.790283203125, -149.30441284179688, 659.0436401367188, -18.535017013549805, 1820.363037109375, 171.28018188476562, 426.4788513183594, 2571.013671875, 1022.7535400390625, 384.1142578125, 1917.981201171875, 74.11483764648438, 557.4380493164062, 1279.71875, 552.3609619140625, 1747.1712646484375, 1431.7041015625, 162.95584106445312, 531.8719482421875, -140.33590698242188, 110.8882827758789, -53.715179443359375, 3256.9130859375, 1497.1314697265625, 706.3056640625, -237.70535278320312, 3805.1328125, -133.5091552734375, 467.7856140136719, -77.25833129882812, 1040.12451171875, 280.5548400878906, 911.6541748046875, -701.0098266601562, 266.554443359375, 314.54229736328125, 434.9027099609375, 238.5137481689453, -401.0476379394531, 1321.8115234375, -67.4804458618164, 273.4107666015625, 299.37969970703125, 325.09576416015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000627.npy"}
|
|
{"epoch": 0.920704845814978, "step": 628, "batch_size": 64, "mean": 442.3868408203125, "std": 661.0192260742188, "min": -911.684814453125, "p10": -283.934455871582, "median": 368.77464294433594, "p90": 1341.9218872070312, "max": 2209.055908203125, "pos_frac": 0.703125, "sample": [342.4196472167969, -21.788375854492188, 423.7236328125, 1356.9688720703125, 1465.46337890625, 164.62246704101562, 712.8681030273438, 197.52154541015625, -254.4638214111328, -7.424009323120117, 188.70956420898438, 135.209228515625, -911.684814453125, 1049.29345703125, 1040.130126953125, 1223.52978515625, -91.84088134765625, -237.9674072265625, 1685.228271484375, 487.54132080078125, 2209.055908203125, 566.8507080078125, 244.00912475585938, -192.40087890625, -61.438316345214844, -296.5647277832031, 470.15704345703125, 855.9994506835938, -528.7650146484375, -472.7088317871094, 1000.4214477539062, 296.7882080078125, 802.5504760742188, 1465.199951171875, 1345.8887939453125, 901.79150390625, 434.8383483886719, 758.8524780273438, 170.6616668701172, 198.04751586914062, -210.42593383789062, 568.7161254882812, 467.850341796875, 76.44306182861328, 1574.789306640625, 1282.724853515625, 1329.61181640625, 225.55084228515625, -411.14569091796875, -775.4671630859375, -142.17019653320312, -207.03175354003906, 1332.665771484375, 837.87744140625, -675.5681762695312, -43.18122100830078, 1084.722412109375, 395.129638671875, 497.6628112792969, 164.14093017578125, 1195.424560546875, 550.0558471679688, -71.85688781738281, 148.94419860839844], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000628.npy"}
|
|
{"epoch": 0.922173274596182, "step": 629, "batch_size": 64, "mean": 406.26995849609375, "std": 548.5282592773438, "min": -1083.5233154296875, "p10": -222.77543029785153, "median": 416.4658203125, "p90": 1199.638122558594, "max": 1766.9041748046875, "pos_frac": 0.8125, "sample": [220.1490478515625, 1266.464599609375, -350.6226501464844, -560.2236938476562, 418.33935546875, 411.271484375, 359.60760498046875, 791.808349609375, 1326.0274658203125, 567.6954956054688, 535.8810424804688, 824.3603515625, -98.67996978759766, 434.20501708984375, 1231.041748046875, -125.52436065673828, 163.58843994140625, 60.262699127197266, 36.950111389160156, 606.8677978515625, 31.689376831054688, 1132.7301025390625, 483.5870056152344, -178.58656311035156, 156.65834045410156, 556.0630493164062, -848.9744262695312, -148.9613037109375, 1026.03173828125, 729.7093505859375, 85.42191314697266, 845.4683227539062, 1403.929443359375, 1766.9041748046875, 290.3919372558594, 163.5512237548828, -1083.5233154296875, 1241.16162109375, 1045.0537109375, -270.05352783203125, -241.71351623535156, -596.8275756835938, 354.19732666015625, 381.72137451171875, 589.053466796875, 1228.31298828125, 299.330078125, 520.8953857421875, 681.3818969726562, 7.193960189819336, 843.946533203125, 321.6199951171875, 82.70098876953125, 665.515625, 220.63381958007812, 496.28509521484375, 19.081369400024414, 661.6664428710938, 972.6351318359375, 597.5764770507812, 431.1988525390625, 627.7608642578125, 414.59228515625, -125.20437622070312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000629.npy"}
|
|
{"epoch": 0.9236417033773862, "step": 630, "batch_size": 64, "mean": 232.0568084716797, "std": 694.9452514648438, "min": -1352.0950927734375, "p10": -474.1148376464844, "median": 161.2292709350586, "p90": 995.3790466308594, "max": 3376.850830078125, "pos_frac": 0.6875, "sample": [1167.38916015625, -1103.1722412109375, -548.8484497070312, 347.5641784667969, -195.34454345703125, 740.5198364257812, 10.983573913574219, -190.05035400390625, 2307.14501953125, -210.1197509765625, 659.2659912109375, -117.01942443847656, 254.33547973632812, 1019.8630981445312, 756.8065795898438, -1352.0950927734375, 145.21450805664062, -576.4849853515625, 302.21368408203125, 211.70103454589844, 456.1562194824219, -218.44712829589844, 145.39682006835938, 1364.6103515625, -145.3109893798828, 3376.850830078125, 39.83903503417969, 982.38525390625, 151.40293884277344, -219.4566192626953, 33.58494567871094, -396.26580810546875, 292.9401550292969, -527.8054809570312, 363.5702209472656, -474.6610412597656, -221.1196746826172, 431.7424621582031, 159.5906524658203, 402.90716552734375, 162.66357421875, -260.24969482421875, 293.7672119140625, -538.56201171875, 172.598876953125, 376.28900146484375, 133.5665283203125, 284.6669006347656, 643.4732666015625, -48.999855041503906, 65.91706848144531, 1000.9478149414062, 634.3969116210938, 159.7949676513672, -338.40216064453125, 58.74470520019531, 344.0293273925781, 181.3184051513672, 636.75439453125, -472.8403625488281, 1165.97021484375, 215.6890869140625, 346.23638916015625, 6.087522506713867], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000630.npy"}
|
|
{"epoch": 0.9251101321585903, "step": 631, "batch_size": 64, "mean": 365.2381591796875, "std": 647.4708251953125, "min": -1795.395751953125, "p10": -395.4131805419922, "median": 319.0447082519531, "p90": 1112.1002075195313, "max": 1989.2781982421875, "pos_frac": 0.71875, "sample": [1104.9781494140625, 983.66259765625, -407.8382263183594, 299.20074462890625, -393.3685302734375, 433.98089599609375, -107.86325073242188, -133.05740356445312, -237.99551391601562, -308.73828125, 1670.095703125, 1246.92578125, -467.7764587402344, 471.5436096191406, -543.2791137695312, 183.5072479248047, 501.4818115234375, 869.4251708984375, 42.67719268798828, -396.2894592285156, 139.4055633544922, -16.990262985229492, 263.9694519042969, -693.9998168945312, 1006.059814453125, 4.745273590087891, 151.66256713867188, -165.16844177246094, 781.5924682617188, 769.769287109375, -1795.395751953125, 516.9493408203125, 1989.2781982421875, 720.25341796875, 383.32745361328125, 740.2631225585938, 274.2832336425781, -167.5368194580078, -124.61380767822266, 1113.259521484375, 226.17752075195312, 539.938232421875, 1532.6239013671875, 860.2294311523438, 610.529296875, -787.8186645507812, 1109.3951416015625, 338.888671875, 1461.3623046875, 458.83380126953125, 72.63610076904297, 638.1190795898438, -156.42926025390625, 393.4567565917969, 233.52557373046875, 202.41552734375, 1230.78271484375, 630.8908081054688, 937.740234375, -100.07820892333984, 264.503173828125, 959.6199951171875, 234.78305053710938, 780.7291870117188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000631.npy"}
|
|
{"epoch": 0.9265785609397944, "step": 632, "batch_size": 64, "mean": 388.82421875, "std": 636.9307250976562, "min": -1301.375, "p10": -324.9979248046875, "median": 373.52227783203125, "p90": 1099.9728637695314, "max": 2343.582275390625, "pos_frac": 0.75, "sample": [708.7247314453125, -713.7784423828125, 295.71728515625, 380.2610778808594, 632.6234741210938, -1301.375, 1080.9833984375, 1108.1112060546875, 1328.4949951171875, 1012.2675170898438, 787.8331909179688, 682.62255859375, 226.17449951171875, 896.468994140625, 267.7918395996094, 222.80953979492188, 580.2607421875, 386.66448974609375, 33.690792083740234, 9.509498596191406, 1572.132568359375, 366.7834777832031, 727.041748046875, 508.10052490234375, -480.86309814453125, 1155.380859375, 614.13525390625, -336.7883605957031, 444.0684509277344, -177.73187255859375, -1150.061279296875, 324.2839660644531, 1032.615478515625, 1036.6854248046875, 588.89306640625, 786.3016967773438, 960.789306640625, 759.0704956054688, 323.4460754394531, -297.4869079589844, 1284.7197265625, -109.37189483642578, -209.71234130859375, -131.69674682617188, 695.452880859375, 302.58062744140625, 667.3423461914062, 138.21392822265625, 1139.28125, 2343.582275390625, 1016.2818603515625, -146.67379760742188, -438.413330078125, 166.951171875, 154.0819549560547, -230.9341278076172, 403.6037902832031, 290.5865173339844, -652.01123046875, 81.47454833984375, 93.19248962402344, -230.91603088378906, -124.91793060302734, 999.3990478515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000632.npy"}
|
|
{"epoch": 0.9280469897209985, "step": 633, "batch_size": 64, "mean": 287.41412353515625, "std": 583.1665649414062, "min": -1075.1495361328125, "p10": -393.2981475830078, "median": 267.5705108642578, "p90": 1143.4038208007812, "max": 1566.03173828125, "pos_frac": 0.71875, "sample": [-399.6371765136719, 1324.922607421875, 763.9129638671875, -671.2568359375, 416.79815673828125, 369.6087646484375, -424.02178955078125, -112.05386352539062, 1216.975830078125, 77.66405487060547, 268.50018310546875, 67.92047119140625, 400.59326171875, -1075.1495361328125, 741.6509399414062, 89.95240020751953, 187.07518005371094, 1149.0831298828125, 355.2119445800781, -233.98214721679688, -153.40252685546875, 331.2972106933594, 313.7763671875, -40.12781524658203, 705.556640625, 165.03945922851562, -12.243635177612305, -134.08319091796875, -935.3988037109375, 1154.5899658203125, 371.23187255859375, 250.73040771484375, 1130.152099609375, 424.9729309082031, 750.8865966796875, -82.75508117675781, -255.76522827148438, -1029.547607421875, -378.507080078125, 175.6746826171875, 1385.361083984375, 475.6354675292969, 185.39019775390625, -797.5985107421875, 590.1078491210938, 646.9515380859375, 558.034912109375, 1566.03173828125, 1041.438232421875, 1079.3446044921875, 149.75741577148438, 266.6408386230469, -246.40159606933594, 892.1798706054688, 326.93023681640625, -24.717025756835938, 154.1809844970703, 367.25604248046875, 1350.73876953125, 88.86083984375, 607.571533203125, 72.53652954101562, 107.01795196533203, 285.408935546875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000633.npy"}
|
|
{"epoch": 0.9295154185022027, "step": 634, "batch_size": 64, "mean": 242.82333374023438, "std": 587.0032958984375, "min": -975.2646484375, "p10": -373.63626098632807, "median": 159.19013214111328, "p90": 1035.0707519531254, "max": 2238.637939453125, "pos_frac": 0.625, "sample": [192.82374572753906, 878.2640380859375, -280.0065612792969, 1560.354736328125, -530.7860717773438, -23.208009719848633, 29.193056106567383, 233.71298217773438, 302.3634948730469, 1251.4669189453125, 34.14752197265625, 294.26019287109375, -90.49866485595703, 293.78326416015625, 642.5752563476562, 146.96920776367188, 243.02496337890625, -238.98065185546875, -717.863525390625, 956.121826171875, 1099.9158935546875, -146.8829803466797, -123.54847717285156, 102.42974853515625, -36.077545166015625, 54.26795196533203, 152.23117065429688, 691.4075317382812, 512.638916015625, 47.61519241333008, 2238.637939453125, 358.5101318359375, 635.416015625, -975.2646484375, 1068.906005859375, 1174.9642333984375, 608.669677734375, -329.2960205078125, -392.63922119140625, 241.53640747070312, 737.3475952148438, -150.05726623535156, -82.76526641845703, -933.8589477539062, 166.1490936279297, -533.0383911132812, -4.033832550048828, -501.087158203125, -190.6705322265625, 862.4177856445312, -312.6827392578125, 731.931396484375, 388.130859375, 1325.71728515625, 365.2767028808594, 88.31852722167969, 270.1934509277344, 650.3116455078125, -93.08727264404297, 254.08511352539062, -7.48382568359375, -96.84611511230469, -257.5126647949219, 702.782958984375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000634.npy"}
|
|
{"epoch": 0.9309838472834068, "step": 635, "batch_size": 64, "mean": 351.2920837402344, "std": 629.7105712890625, "min": -1083.65625, "p10": -298.73904266357414, "median": 335.932861328125, "p90": 1013.8996582031253, "max": 2300.81298828125, "pos_frac": 0.71875, "sample": [872.781005859375, 101.19508361816406, 2300.81298828125, -197.8743896484375, 440.0773620605469, -28.081932067871094, 188.45526123046875, 1.5118522644042969, -11.581680297851562, 523.5579223632812, 41.93238067626953, 1546.396728515625, 16.360916137695312, 819.431884765625, -520.6024169921875, 362.71551513671875, 555.115966796875, -30.797454833984375, 772.253173828125, 27.147994995117188, 410.03192138671875, 185.6389617919922, 741.6790771484375, 928.952880859375, 355.09783935546875, 108.4185562133789, 9.117645263671875, 436.02215576171875, -220.11585998535156, 1780.61181640625, -108.42487335205078, 380.44342041015625, 381.71636962890625, 1211.9698486328125, 772.3646850585938, -136.8638916015625, 92.51506805419922, -21.664936065673828, 454.992919921875, 1050.305419921875, -167.48182678222656, 894.5594482421875, 928.2599487304688, 355.5432434082031, -745.55224609375, -155.01718139648438, 642.2408447265625, -50.67176055908203, -332.4346923828125, 605.2857666015625, -478.39483642578125, -400.77227783203125, -1083.65625, 847.9536743164062, 1176.697021484375, 272.4023742675781, 442.9036560058594, 698.8607177734375, -913.2926025390625, 2110.23193359375, 187.76980590820312, 459.6087951660156, 316.76788330078125, 277.2659912109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000635.npy"}
|
|
{"epoch": 0.9324522760646109, "step": 636, "batch_size": 64, "mean": 425.38995361328125, "std": 661.10986328125, "min": -903.7350463867188, "p10": -239.64664764404296, "median": 326.12953186035156, "p90": 976.4569091796875, "max": 3726.567626953125, "pos_frac": 0.75, "sample": [44.77899169921875, -362.2305908203125, 90.21383666992188, -116.13717651367188, 286.3384704589844, -423.1743469238281, 392.36761474609375, 360.00811767578125, 470.7170715332031, 10.112205505371094, 797.005126953125, -362.22955322265625, 355.3615417480469, 1167.795654296875, 23.166831970214844, 977.0690307617188, -92.94189453125, 2161.21337890625, 1250.12158203125, 490.52069091796875, 254.86647033691406, 1009.8402709960938, -70.89041137695312, -277.722900390625, 790.8763427734375, 296.3069152832031, 888.352783203125, 411.27044677734375, 146.4088592529297, 3726.567626953125, 17.763687133789062, -222.3966827392578, 874.7587280273438, 1251.3564453125, -313.9031677246094, 964.0804443359375, 973.1080322265625, 372.6677551269531, 530.5968017578125, -221.14895629882812, 655.032958984375, 211.42225646972656, 822.4559326171875, 798.5870361328125, 733.1190795898438, 292.78887939453125, 833.55078125, -5.199310302734375, 289.49298095703125, -247.03948974609375, 184.09181213378906, -18.516387939453125, -903.7350463867188, 396.6439208984375, 264.3058776855469, -195.6703643798828, 975.0286254882812, 721.5850830078125, 290.3109436035156, 449.39111328125, -8.082168579101562, 296.89752197265625, 534.9832153320312, 930.6765747070312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000636.npy"}
|
|
{"epoch": 0.933920704845815, "step": 637, "batch_size": 64, "mean": 459.3734130859375, "std": 641.8457641601562, "min": -996.6302490234375, "p10": -90.57667846679686, "median": 322.39808654785156, "p90": 1401.0846923828126, "max": 2329.317138671875, "pos_frac": 0.828125, "sample": [113.15288543701172, 60.729820251464844, 557.363525390625, -8.996467590332031, 52.43899917602539, 1008.6630859375, 312.9375, 70.486083984375, 642.9221801757812, 1292.425048828125, 720.6455688476562, -941.8338012695312, 92.93870544433594, 138.34805297851562, -186.0057830810547, 596.5505981445312, 878.5612182617188, 1401.51513671875, 1029.4764404296875, 615.3289794921875, 1613.27197265625, 330.05126953125, 206.45443725585938, 2329.317138671875, 109.00942993164062, 466.39691162109375, -358.7233581542969, 13.35775375366211, 325.1553649902344, -34.401947021484375, 326.5348815917969, -209.14035034179688, -151.09600830078125, 616.14306640625, 421.5556335449219, -98.70878601074219, 319.64080810546875, 695.1168823242188, 819.5908203125, 221.40643310546875, 561.695556640625, 429.10931396484375, 181.64767456054688, 922.015869140625, 638.037109375, 577.1876831054688, 1400.080322265625, 65.06647491455078, 1762.8887939453125, -996.6302490234375, 1134.9346923828125, 60.87742614746094, 287.3077392578125, 1633.614990234375, 343.1349182128906, 62.15275573730469, 74.64381408691406, 1470.8905029296875, -71.60176086425781, 2254.898681640625, 14.874252319335938, 106.62428283691406, 139.32470703125, -61.45636749267578], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000637.npy"}
|
|
{"epoch": 0.9353891336270191, "step": 638, "batch_size": 64, "mean": 358.84271240234375, "std": 700.8246459960938, "min": -3408.7744140625, "p10": -85.08853530883788, "median": 275.27667236328125, "p90": 1157.8909423828127, "max": 1784.43212890625, "pos_frac": 0.765625, "sample": [1231.2763671875, -680.7144165039062, -3408.7744140625, 979.8616333007812, 284.627197265625, 152.23313903808594, 48.441070556640625, 39.114013671875, 1134.5301513671875, 342.8979797363281, 725.3726806640625, 1364.149169921875, 82.52488708496094, 734.7677612304688, 263.48486328125, 508.412353515625, 612.7484130859375, -528.4863891601562, 514.3150634765625, 35.85463333129883, 388.9867858886719, 43.54631805419922, -211.2803955078125, 134.87298583984375, 281.6094055175781, 913.8837890625, 1353.2113037109375, -18.048480987548828, 684.570068359375, 252.64041137695312, 755.3021850585938, 252.51361083984375, 605.0286254882812, -44.039024353027344, -104.01365661621094, 750.6278076171875, 932.1982421875, -54.718997955322266, 328.6360778808594, -347.6410827636719, -49.98826599121094, 499.8889465332031, 268.9439392089844, 1784.43212890625, -14.518180847167969, -27.1197452545166, 784.2496948242188, 106.07221984863281, 596.9769897460938, 1127.3509521484375, -50.757415771484375, -65.02081298828125, 1020.0609130859375, 474.61669921875, 147.44265747070312, 67.48330688476562, 67.3558120727539, 57.544288635253906, 786.0110473632812, 1167.9027099609375, -93.6889877319336, 95.34752655029297, 1561.8126220703125, 1319.0125732421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000638.npy"}
|
|
{"epoch": 0.9368575624082232, "step": 639, "batch_size": 64, "mean": 165.82308959960938, "std": 706.2058715820312, "min": -1354.9571533203125, "p10": -690.8281677246093, "median": 161.29566955566406, "p90": 1059.0478271484376, "max": 1679.7008056640625, "pos_frac": 0.609375, "sample": [1521.30224609375, -910.359619140625, -472.98724365234375, 1179.149169921875, 952.101806640625, -433.15203857421875, 263.9401550292969, 399.4985656738281, -573.626953125, 887.0599975585938, 1657.321533203125, -33.182987213134766, 350.5717468261719, 651.2546997070312, -1052.0428466796875, 304.5664367675781, -454.1153564453125, -376.100341796875, 113.57113647460938, 474.6794738769531, 643.7440185546875, -1298.48681640625, 879.2092895507812, 589.61572265625, -636.8079833984375, -1354.9571533203125, -1125.3299560546875, 263.0681457519531, 111.48184967041016, 108.24702453613281, 629.204345703125, 559.2329711914062, -3.1359329223632812, -608.4450073242188, 1074.36767578125, 1216.7247314453125, -357.3814697265625, 844.694091796875, -445.9742431640625, -30.887123107910156, 55.788841247558594, -110.12478637695312, 683.6160888671875, 382.2156982421875, 1679.7008056640625, -862.0282592773438, -118.38431549072266, 72.35140228271484, -320.86456298828125, -386.98809814453125, 167.72328186035156, 785.4510498046875, 312.3291015625, 1023.301513671875, -713.9796752929688, -433.6787414550781, -458.7596740722656, 154.86805725097656, 144.93397521972656, 678.5560302734375, 312.3809509277344, 1298.992431640625, 583.8072509765625, 173.83493041992188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000639.npy"}
|
|
{"epoch": 0.9383259911894273, "step": 640, "batch_size": 64, "mean": 558.8187255859375, "std": 758.8751831054688, "min": -890.9835815429688, "p10": -198.78300781249996, "median": 410.15126037597656, "p90": 1372.1029418945313, "max": 2911.274658203125, "pos_frac": 0.78125, "sample": [763.6207885742188, 564.2176513671875, -460.0438232421875, -146.15489196777344, 159.17898559570312, 413.3760681152344, 296.24609375, 1335.90087890625, 1300.88330078125, 204.45391845703125, 1375.873779296875, 1574.2933349609375, 936.6246337890625, 82.84634399414062, -266.4834289550781, 498.9050598144531, 897.1806030273438, -114.3709716796875, -79.38874816894531, 1334.572021484375, -776.4193115234375, 984.0185546875, 418.82989501953125, 681.8533935546875, -212.86141967773438, -890.9835815429688, 139.52963256835938, 406.92645263671875, 2911.274658203125, 888.2738037109375, 573.1224365234375, 1035.1285400390625, -702.6045532226562, 583.1723022460938, 2640.999267578125, 323.61260986328125, -165.93338012695312, 254.51942443847656, 61.5882568359375, 365.8349609375, -83.26274108886719, 251.26564025878906, 36.49101257324219, 1258.0743408203125, 1028.8359375, 188.59201049804688, 965.3036499023438, 677.965576171875, -465.621337890625, 1363.3043212890625, 2066.86767578125, 352.9799499511719, 26.922515869140625, 1183.0853271484375, -0.9454498291015625, 774.2893676757812, 397.886962890625, 797.2839965820312, 77.41963195800781, 755.0075073242188, 1407.7293701171875, 2369.18896484375, -145.48143005371094, 289.6014099121094], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000640.npy"}
|
|
{"epoch": 0.9397944199706314, "step": 641, "batch_size": 64, "mean": 358.13702392578125, "std": 724.6242065429688, "min": -1272.158935546875, "p10": -498.09977416992183, "median": 345.08201599121094, "p90": 1083.1233276367188, "max": 2912.272705078125, "pos_frac": 0.78125, "sample": [136.86544799804688, 322.5809326171875, 567.4153442382812, 1043.438232421875, -518.7921142578125, -163.36282348632812, 351.6484375, 1527.9464111328125, 429.9732666015625, 95.37043762207031, -1081.47216796875, 484.0974426269531, 18.287002563476562, -435.8550720214844, 2912.272705078125, -506.99066162109375, -420.37347412109375, -477.3543701171875, 39.83299255371094, 933.4498291015625, 11.688323974609375, 116.91947174072266, 100.55841064453125, 615.4165649414062, 229.76211547851562, 582.9776611328125, -1211.9385986328125, 934.4119873046875, 587.7705078125, 47.59782409667969, 879.7605590820312, 505.5023193359375, 660.2635498046875, 129.2554931640625, 85.57959747314453, 49.75997543334961, 1265.6295166015625, 331.1490478515625, 593.845703125, 613.6611328125, 158.5316162109375, 34.62896728515625, -72.1594009399414, -662.5099487304688, 212.99737548828125, 833.1632690429688, -1272.158935546875, 1100.1312255859375, 914.0247192382812, 957.2174682617188, 2243.492431640625, 785.1492309570312, -1035.741943359375, 759.857177734375, -30.100257873535156, 365.8275451660156, 647.4083251953125, -261.87835693359375, 506.29351806640625, 338.5155944824219, 950.56982421875, 1207.3187255859375, 656.139404296875, 1195.50439453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000641.npy"}
|
|
{"epoch": 0.9412628487518355, "step": 642, "batch_size": 64, "mean": 439.28436279296875, "std": 889.5961303710938, "min": -1764.3668212890625, "p10": -308.2948364257812, "median": 329.50494384765625, "p90": 1275.9433837890629, "max": 4663.4541015625, "pos_frac": 0.75, "sample": [101.24665832519531, 565.1837158203125, 726.5223388671875, 559.495849609375, 269.1412353515625, -1764.3668212890625, 771.0169677734375, 266.8716735839844, -68.94979095458984, -539.7617797851562, -121.67346954345703, 101.01754760742188, 1416.5380859375, -320.1402587890625, 863.8590087890625, 1002.4963989257812, 519.7920532226562, 753.4541015625, -833.7213745117188, 45.28493881225586, 240.16635131835938, -171.74066162109375, 361.270751953125, 36.18821716308594, 289.02569580078125, 750.3217163085938, -101.7890396118164, 233.62353515625, 96.57545471191406, 420.1101379394531, 891.9217529296875, 662.2723388671875, 712.5255126953125, 2423.8994140625, -243.58523559570312, 83.70883178710938, 371.5128173828125, 736.7763671875, -605.797119140625, 232.65347290039062, 847.3750610351562, 165.9320068359375, 490.7391052246094, 1309.655517578125, -280.655517578125, 1197.28173828125, 815.0665893554688, 1607.47802734375, -205.72169494628906, 5.022705078125, 4663.4541015625, 1021.7982788085938, -631.5464477539062, 469.982177734375, -209.18467712402344, -168.63856506347656, 363.90673828125, 2424.236328125, 44.158172607421875, -1047.0587158203125, 504.8916015625, 1726.3203125, 297.7391357421875, 969.0184936523438], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000642.npy"}
|
|
{"epoch": 0.9427312775330396, "step": 643, "batch_size": 64, "mean": 344.084228515625, "std": 546.4957885742188, "min": -762.9945068359375, "p10": -397.6305938720703, "median": 310.5269775390625, "p90": 1000.9250305175782, "max": 1817.4775390625, "pos_frac": 0.78125, "sample": [312.506591796875, 383.2859802246094, 159.87477111816406, 910.3665771484375, 809.7388305664062, 634.267822265625, 525.1923828125, -88.00188446044922, 245.50270080566406, 851.4468383789062, 16.672313690185547, -680.4737548828125, 341.4474182128906, 295.9775695800781, -158.32130432128906, 913.26171875, 810.2815551757812, 1010.0427856445312, 1097.631103515625, 479.6331481933594, 168.44593811035156, 40.71665573120117, -312.8045959472656, 187.80667114257812, 34.18904495239258, 180.16607666015625, -762.9945068359375, 676.3914184570312, 443.8004455566406, 974.4320068359375, -465.0970458984375, 276.2764587402344, 308.54736328125, 218.57861328125, 956.776123046875, 1200.81689453125, 417.6880187988281, -442.93408203125, 730.4409790039062, 369.3453063964844, -477.56341552734375, 1793.304931640625, 1079.335693359375, 341.31024169921875, 656.5663452148438, 94.39026641845703, 2.8523731231689453, 428.5230712890625, 1817.4775390625, 55.407474517822266, 979.6502685546875, 645.8768310546875, 738.2200927734375, -325.95343017578125, 1060.0604248046875, 41.442474365234375, -373.29168701171875, -408.0615539550781, 417.08013916015625, 26.771881103515625, -155.9517822265625, -117.64608764648438, -512.5817260742188, 143.2470703125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000643.npy"}
|
|
{"epoch": 0.9441997063142438, "step": 644, "batch_size": 64, "mean": 358.000244140625, "std": 727.659912109375, "min": -1717.1717529296875, "p10": -552.1825927734375, "median": 232.5782012939453, "p90": 1155.8793457031252, "max": 2502.4169921875, "pos_frac": 0.734375, "sample": [2502.4169921875, 52.553550720214844, 906.4190673828125, -1717.1717529296875, 487.6593017578125, 387.67034912109375, -758.114013671875, 31.230606079101562, 963.128662109375, 388.0243835449219, 43.883365631103516, -29.302902221679688, 65.28424835205078, 469.6314697265625, 115.56736755371094, 639.9632568359375, 175.91107177734375, -20.37237548828125, 260.8763732910156, -567.353759765625, 162.88796997070312, 1964.0245361328125, 59.84815979003906, -591.083984375, 779.6832275390625, 915.6143798828125, -1.8432769775390625, 495.89801025390625, -273.10040283203125, 1590.9627685546875, -33.87328338623047, -346.1415710449219, 777.79296875, -62.64909362792969, 76.14906311035156, -36.56415557861328, 1175.30419921875, 705.930419921875, 715.5520629882812, 285.019775390625, 126.24349212646484, -576.7393798828125, 526.3230590820312, 56.97295379638672, 1098.829345703125, 561.7139282226562, -593.6614379882812, -294.46588134765625, 118.96470642089844, 1054.720947265625, 1612.9195556640625, 820.426025390625, 96.13197326660156, -1086.859130859375, 1072.9501953125, 1110.5546875, 369.58184814453125, 1328.325927734375, 25.787445068359375, 435.0265197753906, 653.5620727539062, 1949.8919677734375, 204.280029296875, -516.783203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000644.npy"}
|
|
{"epoch": 0.9456681350954479, "step": 645, "batch_size": 64, "mean": 177.82760620117188, "std": 677.1175537109375, "min": -1249.979248046875, "p10": -700.8367065429687, "median": 93.66887664794922, "p90": 1070.8259887695312, "max": 2201.6015625, "pos_frac": 0.609375, "sample": [335.0145568847656, 1050.2314453125, -676.4718017578125, 931.6392211914062, -1249.979248046875, -878.084228515625, -669.883544921875, 39.734161376953125, 413.9723815917969, -157.89845275878906, -130.71913146972656, -41.10650634765625, 54.712852478027344, -7.993259429931641, -88.37411499023438, -347.93341064453125, 404.92364501953125, 301.6349792480469, 979.7998657226562, 1082.7174072265625, 663.625732421875, -388.283203125, 71.71514892578125, 38.68467712402344, 128.0531768798828, 64.82823181152344, 1079.6522216796875, -32.728919982910156, -729.074462890625, -1041.8594970703125, 189.26148986816406, 402.0859069824219, -711.27880859375, 1345.94091796875, 868.8745727539062, 24.045928955078125, 1880.068359375, 788.0162353515625, -1139.435546875, -634.7494506835938, 1306.0191650390625, -42.865692138671875, 115.62260437011719, 186.2506866455078, 397.2547607421875, 544.189208984375, 392.682861328125, 306.0013732910156, 228.4609375, 632.9100952148438, -62.48333740234375, -285.03021240234375, 1206.2705078125, 31.28954315185547, 387.3134765625, 2201.6015625, -97.60942840576172, 341.71575927734375, -864.195556640625, -117.1977310180664, 552.5585327148438, 202.6358184814453, -100.97557067871094, -294.83197021484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000645.npy"}
|
|
{"epoch": 0.947136563876652, "step": 646, "batch_size": 64, "mean": 305.16351318359375, "std": 579.70361328125, "min": -1013.6762084960938, "p10": -348.19539489746086, "median": 214.67088317871094, "p90": 1046.6956481933594, "max": 2064.737060546875, "pos_frac": 0.71875, "sample": [901.6470947265625, 1018.6847534179688, -146.54736328125, 1451.76708984375, 624.446044921875, 221.13087463378906, 499.10357666015625, 659.2590942382812, 910.496826171875, 26.151687622070312, 372.9458923339844, 503.9674072265625, -934.1474609375, -9.571380615234375, 1016.2304077148438, 121.10035705566406, -282.6957702636719, 286.54864501953125, 1058.7003173828125, -41.93938446044922, 429.36627197265625, -140.90087890625, -165.62030029296875, 642.5993041992188, 97.45486450195312, 355.7160339355469, 436.17919921875, 237.25917053222656, 85.02008056640625, 881.2291259765625, 104.81158447265625, 463.6533508300781, 88.40756225585938, 166.6356964111328, -209.1509246826172, 208.2108917236328, 63.40728759765625, 1621.417236328125, -216.5934600830078, -1013.6762084960938, 397.7105712890625, 626.2470703125, 119.63850402832031, -425.43780517578125, -68.25126647949219, -560.2987060546875, -74.90464782714844, 69.08019256591797, -510.2503662109375, 2064.737060546875, 1059.551025390625, 279.39483642578125, 577.7482299804688, -149.5766143798828, 1339.676513671875, -376.26666259765625, -593.7366943359375, 143.50900268554688, 200.0680389404297, 139.20346069335938, 1129.482421875, 975.1339721679688, 486.83782958984375, 288.465576171875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000646.npy"}
|
|
{"epoch": 0.9486049926578561, "step": 647, "batch_size": 64, "mean": 382.257080078125, "std": 538.4498901367188, "min": -1129.6923828125, "p10": -168.6048126220703, "median": 375.04701232910156, "p90": 1059.4821044921875, "max": 1976.7203369140625, "pos_frac": 0.78125, "sample": [789.3316650390625, 601.5383911132812, 308.6999816894531, 247.7792510986328, 305.3194274902344, 1131.8878173828125, 259.0339050292969, 407.5088806152344, 542.62646484375, 595.0865478515625, 1541.20703125, 134.72207641601562, 43.802154541015625, 1771.784912109375, 682.0543212890625, 517.3172607421875, -68.62379455566406, 1051.2813720703125, 650.3433227539062, -170.8995819091797, 216.28952026367188, 198.14849853515625, -44.60499572753906, -163.25035095214844, 564.7417602539062, 431.64752197265625, -74.27580261230469, 1976.7203369140625, -840.8810424804688, 843.2601318359375, -181.73480224609375, -221.9837646484375, 116.13113403320312, 791.8502807617188, 1054.775390625, 744.5596923828125, 472.71112060546875, 371.87847900390625, -537.4277954101562, 216.16111755371094, 531.6823120117188, 95.05278778076172, 1146.6878662109375, -145.408203125, -98.91275024414062, 433.4908447265625, 423.76861572265625, 404.1878662109375, -36.528526306152344, 1061.499267578125, 192.13340759277344, 94.4824447631836, 1159.258056640625, 279.4815673828125, 30.535507202148438, 532.863037109375, -1129.6923828125, 744.52001953125, -278.3913269042969, 337.21697998046875, 506.4195861816406, 391.86639404296875, 378.2155456542969, 133.5057373046875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000647.npy"}
|
|
{"epoch": 0.9500734214390602, "step": 648, "batch_size": 64, "mean": 572.3054809570312, "std": 779.3780517578125, "min": -1028.548828125, "p10": -184.86049499511716, "median": 505.5013732910156, "p90": 1480.1082153320315, "max": 3475.093994140625, "pos_frac": 0.78125, "sample": [502.9968566894531, 283.010009765625, 154.3126220703125, 540.6661987304688, 298.9195251464844, 694.0574340820312, 1993.05078125, 1508.58349609375, 838.4375, 1412.24853515625, 53.778228759765625, -720.0283203125, 53.401031494140625, 908.1950073242188, 263.149169921875, -135.52845764160156, 1323.9124755859375, 497.26275634765625, 18.442245483398438, -60.97802734375, -23.270734786987305, 515.1357421875, -106.59577178955078, 884.1903076171875, -229.02793884277344, 1887.1505126953125, 561.1665649414062, 116.03932189941406, -23.161026000976562, -170.18115234375, 805.9617919921875, 867.161865234375, -191.15164184570312, 3475.093994140625, 654.4171142578125, 569.9282836914062, -1028.548828125, 575.5435791015625, 35.00286865234375, -665.7108154296875, -246.00250244140625, 316.8125, 2308.181884765625, 734.796630859375, 667.6630859375, 403.4342346191406, 1413.6658935546875, -86.5208740234375, 580.9730834960938, 668.5239868164062, 1078.4990234375, 372.34173583984375, 341.2449645996094, 628.9539794921875, 1314.1265869140625, 104.0240707397461, 508.0058898925781, 751.635986328125, 425.7130126953125, 2328.87255859375, 1386.7301025390625, 228.0634765625, -420.8195495605469, 1881.597900390625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000648.npy"}
|
|
{"epoch": 0.9515418502202643, "step": 649, "batch_size": 64, "mean": 264.7760009765625, "std": 660.9012451171875, "min": -908.830810546875, "p10": -461.84951477050777, "median": 204.0474624633789, "p90": 1008.770135498047, "max": 3133.153076171875, "pos_frac": 0.609375, "sample": [510.12335205078125, 13.395942687988281, 459.3455810546875, -108.08938598632812, 502.7803039550781, -349.6244812011719, -10.63491439819336, 198.17913818359375, 761.92724609375, 310.98175048828125, 532.9247436523438, -147.8111572265625, -471.8529968261719, 56.86106872558594, -661.3300170898438, -130.16897583007812, -44.64701843261719, 440.5649108886719, -328.4031066894531, 246.7823486328125, 673.403564453125, 172.4519500732422, -511.2366027832031, -180.49212646484375, 970.7756958007812, -782.7811889648438, 683.8201293945312, 866.9270629882812, -131.27833557128906, 598.0294189453125, 360.8858947753906, -382.7571105957031, 82.46917724609375, -112.50093841552734, 805.5933837890625, 554.584716796875, 857.5703125, 377.23480224609375, 867.4490356445312, -426.4493408203125, 1187.10986328125, 441.3556823730469, 5.298259735107422, 1161.26953125, 874.1651611328125, -225.19235229492188, 1357.1810302734375, 3133.153076171875, 410.29583740234375, 377.5641174316406, -99.21170806884766, 209.91578674316406, 10.538223266601562, 848.4255981445312, -378.35699462890625, 1238.0567626953125, 1125.943115234375, -474.0793762207031, -19.737903594970703, -178.01910400390625, -908.830810546875, -862.728271484375, 1025.053466796875, -438.508056640625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000649.npy"}
|
|
{"epoch": 0.9530102790014684, "step": 650, "batch_size": 64, "mean": 421.4519348144531, "std": 729.4365234375, "min": -1144.8297119140625, "p10": -459.9873504638672, "median": 304.30992126464844, "p90": 1298.3082153320313, "max": 2469.744873046875, "pos_frac": 0.765625, "sample": [55.68177032470703, -558.3043823242188, 808.0812377929688, 816.9378662109375, 177.21261596679688, 931.2064208984375, 302.519775390625, -1068.714599609375, 2259.47509765625, 594.4686279296875, 1171.1292724609375, -763.5836181640625, 1365.939697265625, 982.2311401367188, 938.7184448242188, 1282.097900390625, 290.26031494140625, 1680.7132568359375, 1153.8260498046875, 2469.744873046875, 795.1802368164062, -133.4947967529297, 1694.0960693359375, 589.2399291992188, 264.07244873046875, 41.37092208862305, -149.72625732421875, 459.9504089355469, 195.10488891601562, 437.3715515136719, 724.7384033203125, -169.62989807128906, -324.2664794921875, -917.2987060546875, -452.73431396484375, 279.3492736816406, -25.331340789794922, 1816.005615234375, -99.35533142089844, 48.73822021484375, 551.3616333007812, 189.88259887695312, -463.0957946777344, 663.5734252929688, 554.2548217773438, 520.931640625, 85.48777770996094, 830.4189453125, 156.64129638671875, 1305.2554931640625, 306.1000671386719, 299.3114929199219, 595.1578369140625, 976.6513671875, 877.44287109375, 61.59160614013672, 474.42193603515625, -691.1246948242188, -1144.8297119140625, 245.050537109375, 717.7197265625, 77.49213409423828, -384.7240905761719, 204.92886352539062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000650.npy"}
|
|
{"epoch": 0.9544787077826725, "step": 651, "batch_size": 64, "mean": 542.8966064453125, "std": 595.1066284179688, "min": -402.380615234375, "p10": -23.079699134826658, "median": 411.27577209472656, "p90": 1317.7631469726564, "max": 2236.120849609375, "pos_frac": 0.859375, "sample": [868.7456665039062, 684.3140258789062, 2236.120849609375, 210.61505126953125, -402.380615234375, -5.96563720703125, 365.6160583496094, 612.9776611328125, 38.18241882324219, 152.3362579345703, 1287.7567138671875, 2165.19580078125, 319.494140625, -222.08148193359375, 111.5654296875, -45.300498962402344, 75.01083374023438, 522.058837890625, -301.8362121582031, 868.3485107421875, 1603.4664306640625, 330.473876953125, 843.8733520507812, 280.6153564453125, 724.1307983398438, 56.44459533691406, -22.622800827026367, -23.2755126953125, 154.6485595703125, 711.8936767578125, 194.88401794433594, 33.37525177001953, 1051.43701171875, 153.48202514648438, 1214.5753173828125, 971.33251953125, 577.71337890625, 36.352569580078125, 760.62109375, 3.924182891845703, 443.6881408691406, 1097.2213134765625, -366.6716003417969, 500.6736145019531, 698.34228515625, 672.6025390625, 25.453125, 946.9321899414062, 122.50096130371094, 606.3108520507812, 164.33682250976562, 386.0636291503906, 1330.623046875, 1786.3956298828125, 349.9613037109375, -370.38189697265625, 1094.842041015625, 388.615966796875, 1681.8912353515625, 433.9355773925781, 1110.1519775390625, 1454.5, 876.2086181640625, 113.06413269042969], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000651.npy"}
|
|
{"epoch": 0.9559471365638766, "step": 652, "batch_size": 64, "mean": 413.9796142578125, "std": 687.5950927734375, "min": -1476.0555419921875, "p10": -304.3278289794921, "median": 307.14662170410156, "p90": 1287.2681152343753, "max": 2406.256591796875, "pos_frac": 0.765625, "sample": [376.94134521484375, 141.31277465820312, 763.4293823242188, -983.0536499023438, 598.7353515625, 459.6040344238281, 227.3008270263672, -521.5259399414062, 746.9813232421875, 164.8402099609375, 1324.7388916015625, 1423.0426025390625, 1199.8363037109375, -1476.0555419921875, 72.88848876953125, 592.4815673828125, 54.670867919921875, 816.2744140625, 1421.69677734375, -404.33642578125, 1159.3974609375, 302.6325988769531, 88.4432601928711, -150.40216064453125, 37.370155334472656, -4.86346435546875, 932.7701416015625, -112.84825134277344, 344.62615966796875, 65.15448760986328, 41.54924392700195, 2406.256591796875, -342.8028259277344, 2380.5732421875, 312.9744567871094, 1554.1419677734375, 47.02116394042969, 88.76190185546875, -25.192319869995117, 30.969635009765625, 476.40313720703125, -43.54193878173828, 203.39675903320312, -133.55935668945312, 964.2344970703125, 878.484130859375, 882.7322998046875, -230.28994750976562, 630.490966796875, 804.899658203125, 1737.1329345703125, 74.40669250488281, 872.7713012695312, 257.6728210449219, -542.77099609375, 428.0392150878906, 1146.87939453125, -336.058349609375, 389.06207275390625, 764.8616943359375, 247.53370666503906, 577.3750610351562, 311.66064453125, -23.459434509277344], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000652.npy"}
|
|
{"epoch": 0.9574155653450808, "step": 653, "batch_size": 64, "mean": 372.2716369628906, "std": 656.0138549804688, "min": -1172.58349609375, "p10": -359.26174621582027, "median": 263.69715118408203, "p90": 1220.8676513671876, "max": 2155.76220703125, "pos_frac": 0.703125, "sample": [876.41748046875, -0.4653167724609375, -135.93421936035156, 1853.749755859375, 247.02552795410156, -395.2382507324219, 109.47486877441406, -508.20166015625, 45.9444580078125, 836.7427368164062, 545.181640625, -263.3050537109375, -68.46923065185547, 1301.9490966796875, 845.800048828125, 1223.1474609375, 833.9996337890625, 1215.548095703125, 1204.5552978515625, -89.45819091796875, 185.85128784179688, 280.3687744140625, 1670.99609375, 424.8192443847656, 532.4805297851562, 715.7302856445312, 1074.3453369140625, 297.80194091796875, -19.733726501464844, -452.0826721191406, 1362.251953125, 244.0735626220703, 603.4237670898438, -1172.58349609375, -245.36764526367188, -111.40431213378906, 423.26837158203125, 481.6846008300781, -663.5184936523438, 2124.272705078125, 2155.76220703125, 577.5957641601562, 12.988157272338867, -44.087562561035156, 618.97265625, 524.2274169921875, 19.203338623046875, -285.7615966796875, 227.05853271484375, 231.90536499023438, 525.258544921875, 141.14199829101562, 783.7711791992188, 180.11459350585938, 470.8638916015625, -730.5678100585938, -390.7618103027344, 323.14306640625, 714.6455688476562, 86.07734680175781, 461.5754699707031, -228.18153381347656, 234.0517578125, -218.75413513183594], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000653.npy"}
|
|
{"epoch": 0.9588839941262849, "step": 654, "batch_size": 64, "mean": 359.82257080078125, "std": 593.821044921875, "min": -592.7657470703125, "p10": -329.3725982666016, "median": 294.88124084472656, "p90": 1265.577490234375, "max": 1844.3818359375, "pos_frac": 0.671875, "sample": [24.304283142089844, 337.9696044921875, -196.22552490234375, 167.2120361328125, 411.59832763671875, -46.49015808105469, -330.2413330078125, 411.93017578125, -49.34403610229492, 734.1141357421875, 767.14892578125, -327.3455505371094, 92.67781829833984, 424.4267578125, -479.85321044921875, -237.06536865234375, -73.23821258544922, 1844.3818359375, 1002.0714721679688, 552.2706298828125, 547.693603515625, 1653.1002197265625, 500.49139404296875, 305.1964111328125, 1362.510009765625, 602.0093383789062, -104.94047546386719, 667.7714233398438, 133.78036499023438, -108.59466552734375, -150.4820098876953, 1177.3243408203125, 405.83013916015625, 65.30038452148438, 32.383880615234375, -13.147697448730469, 875.25927734375, 197.99057006835938, 284.5660705566406, 403.8204040527344, 492.8372802734375, 706.4310913085938, 710.2256469726562, -233.14382934570312, -592.7657470703125, 853.4788208007812, 310.89923095703125, 1652.282470703125, 1246.57666015625, -246.82630920410156, 23.18035888671875, 1410.265869140625, 580.0844116210938, 270.2342834472656, -60.65392303466797, -400.50201416015625, 571.474609375, -369.3717041015625, 1814.9443359375, 1273.720703125, 77.71139526367188, -356.27569580078125, -391.11199951171875, -183.21551513671875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000654.npy"}
|
|
{"epoch": 0.960352422907489, "step": 655, "batch_size": 64, "mean": 377.6003723144531, "std": 614.0530395507812, "min": -756.1159057617188, "p10": -313.75294189453126, "median": 279.36878967285156, "p90": 1177.7396484375001, "max": 2197.7119140625, "pos_frac": 0.703125, "sample": [-314.46588134765625, -756.1159057617188, 1303.354736328125, 145.83291625976562, 84.55945587158203, 1218.6226806640625, 630.1790771484375, 887.7733154296875, 374.1467590332031, -227.81341552734375, 791.1677856445312, 52.64292907714844, -145.89581298828125, -549.40771484375, -27.790624618530273, 1191.428466796875, -174.05026245117188, 849.53759765625, -247.8730926513672, 1049.248779296875, -367.095703125, 413.1106872558594, 890.3428955078125, -619.6116943359375, -312.08941650390625, 585.6138916015625, 312.6670837402344, 83.45645141601562, -98.39878845214844, 259.9561767578125, -76.8446273803711, 1612.9022216796875, 197.01063537597656, 273.9009094238281, 592.691650390625, 847.8341674804688, -629.1616821289062, 756.1807861328125, 631.5032958984375, 1039.9708251953125, 347.4626159667969, 73.21195220947266, 587.300048828125, 2197.7119140625, 1588.2838134765625, -107.0450439453125, 360.9333801269531, -510.0290832519531, 1039.105224609375, 508.31689453125, -121.76786804199219, 92.756591796875, 24.36505126953125, 220.09971618652344, 1145.799072265625, 284.836669921875, 1587.9156494140625, -193.0015411376953, -86.12800598144531, 554.9601440429688, 741.1234741210938, 227.6324920654297, 158.92706298828125, 914.6333618164062], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000655.npy"}
|
|
{"epoch": 0.9618208516886931, "step": 656, "batch_size": 64, "mean": 647.600830078125, "std": 710.2678833007812, "min": -492.44647216796875, "p10": -81.57827377319336, "median": 440.85150146484375, "p90": 1740.7328247070313, "max": 2639.280029296875, "pos_frac": 0.828125, "sample": [1500.8692626953125, 804.521728515625, -492.44647216796875, -38.90004348754883, 124.54914093017578, 244.2481689453125, 356.49530029296875, 574.1377563476562, 278.9208679199219, 768.6184692382812, 895.41943359375, 766.2289428710938, 304.48602294921875, 129.71975708007812, 478.3598327636719, 1691.42529296875, -147.81939697265625, 83.18978881835938, 1761.8646240234375, 170.5831298828125, 845.6357421875, -22.2315673828125, 2101.3623046875, -157.34307861328125, -115.6038818359375, 264.10113525390625, 951.3740234375, 1017.0642700195312, 449.0430908203125, 1104.761962890625, 397.845458984375, 2639.280029296875, 706.0693359375, -80.54698181152344, 2317.357177734375, 849.2672729492188, 432.659912109375, 1096.5533447265625, 2400.112060546875, 1.983367919921875, 2083.12255859375, -172.58311462402344, 1305.8323974609375, 52.138427734375, 1097.218505859375, 991.2721557617188, 341.65802001953125, 2129.10400390625, 308.2835693359375, 190.06973266601562, 759.4057006835938, 118.11515808105469, -127.43204498291016, 311.17547607421875, 925.933837890625, 146.15969848632812, -21.692642211914062, 279.95068359375, -82.02025604248047, 1062.6339111328125, 851.7325439453125, 37.34796905517578, 654.6924438476562, 751.11865234375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000656.npy"}
|
|
{"epoch": 0.9632892804698973, "step": 657, "batch_size": 64, "mean": 519.62646484375, "std": 690.0695190429688, "min": -1566.7039794921875, "p10": -180.05298004150382, "median": 454.6181945800781, "p90": 1328.7445556640625, "max": 2481.67431640625, "pos_frac": 0.84375, "sample": [1161.83349609375, 29.56366729736328, 852.8792114257812, 491.48358154296875, 1490.2408447265625, 1544.71728515625, 690.0067749023438, 128.6355438232422, 1300.7529296875, -105.997314453125, -321.00018310546875, 464.42724609375, 1340.740966796875, -211.79112243652344, 253.5760955810547, 501.16534423828125, 61.59025573730469, 387.9606628417969, 128.3812255859375, 1625.538330078125, 2274.475830078125, 442.50250244140625, 69.35417938232422, 2481.67431640625, 1254.9295654296875, 1183.504150390625, 251.61500549316406, 74.61163330078125, 737.4156494140625, 632.7320556640625, 850.7255249023438, 486.89996337890625, 34.012733459472656, -1566.7039794921875, 573.8799438476562, 489.3936767578125, 268.06695556640625, -262.54876708984375, -6.82476806640625, 315.41314697265625, 614.8302001953125, -862.8682861328125, -277.31573486328125, 578.3494262695312, 859.9892578125, 57.544151306152344, 252.9574432373047, 613.4559936523438, 437.502685546875, 18.19416046142578, 720.01416015625, 763.8618774414062, 213.5899200439453, 835.9649658203125, 444.80914306640625, 351.3070373535156, 1256.2091064453125, 2437.70751953125, 419.4740905761719, -276.0984802246094, 274.2052917480469, -32.80952453613281, 628.9171142578125, 526.4705200195312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000657.npy"}
|
|
{"epoch": 0.9647577092511013, "step": 658, "batch_size": 64, "mean": 418.6898193359375, "std": 605.3524780273438, "min": -1120.515380859375, "p10": -363.9357055664062, "median": 425.72630310058594, "p90": 1244.3431152343753, "max": 1948.28759765625, "pos_frac": 0.75, "sample": [732.5283813476562, 820.34765625, 744.5323486328125, -301.688720703125, 490.8882141113281, -254.25408935546875, 1171.1846923828125, 268.328125, -378.400634765625, 440.01385498046875, 430.005126953125, 1400.020751953125, 85.01170349121094, 237.66366577148438, 289.44232177734375, -512.8408203125, 421.4474792480469, 353.5807800292969, 93.2510757446289, -82.66596984863281, 624.754638671875, 147.1123046875, 459.81085205078125, 154.36056518554688, -1120.515380859375, 919.2032470703125, -355.4351501464844, 592.6397705078125, 573.6879272460938, -153.06649780273438, -125.19688415527344, 293.66375732421875, -57.34207534790039, 404.75634765625, 280.201416015625, 1539.7550048828125, -56.80762481689453, 1130.8048095703125, 666.869384765625, 1144.4637451171875, 20.770099639892578, 410.7756042480469, 678.7032470703125, 1497.67431640625, 1273.6573486328125, 648.0431518554688, -444.9978332519531, 534.5177612304688, 860.4110717773438, 856.631103515625, -477.7371826171875, 1358.1143798828125, -363.82086181640625, 1948.28759765625, 712.0125122070312, 1175.9432373046875, 634.503173828125, 1418.7822265625, -363.98492431640625, 699.0048828125, 139.8687286376953, 504.4674987792969, 202.464111328125, -640.0594482421875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000658.npy"}
|
|
{"epoch": 0.9662261380323054, "step": 659, "batch_size": 64, "mean": 402.4836120605469, "std": 595.201171875, "min": -889.4042358398438, "p10": -280.9352325439453, "median": 370.58692932128906, "p90": 1110.7233764648438, "max": 2501.0693359375, "pos_frac": 0.734375, "sample": [609.943603515625, -63.885955810546875, 215.304443359375, 815.6600341796875, -261.4331359863281, -289.29327392578125, 1211.8475341796875, -395.74676513671875, -504.4663391113281, -143.7119903564453, 1063.1451416015625, 385.6800537109375, 569.017578125, -150.8011932373047, 123.40211486816406, 156.48089599609375, 602.3291625976562, 166.49710083007812, 241.5020751953125, 197.46194458007812, 352.3764953613281, 566.2406005859375, -484.3272399902344, 2501.0693359375, -889.4042358398438, 1332.700927734375, 367.0505676269531, 659.916259765625, 1120.587158203125, 326.61785888671875, 406.2739562988281, 490.6435852050781, 0.190948486328125, -64.80232238769531, 678.9066162109375, 347.024658203125, 82.26860046386719, -19.067501068115234, 60.735198974609375, 1141.6959228515625, -550.9495849609375, -43.20232391357422, 660.6723022460938, -154.0517120361328, 760.9125366210938, -42.208377838134766, -27.87834930419922, 680.590087890625, 1717.472412109375, 1905.730224609375, 876.5530395507812, 232.01734924316406, 810.7420654296875, 499.8519592285156, 535.54638671875, 374.123291015625, 903.2140502929688, 1087.7078857421875, 542.24560546875, -332.7541809082031, 785.0807495117188, 81.94496154785156, 467.5821533203125, 462.37939453125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000659.npy"}
|
|
{"epoch": 0.9676945668135095, "step": 660, "batch_size": 64, "mean": 525.3436279296875, "std": 653.158203125, "min": -1113.2904052734375, "p10": -231.09403076171873, "median": 500.4145965576172, "p90": 1461.6256958007818, "max": 2336.880126953125, "pos_frac": 0.765625, "sample": [-142.36973571777344, 228.3193359375, 1129.8052978515625, 509.6623229980469, 455.269775390625, 500.193115234375, 634.2451171875, 1521.7564697265625, 1086.62548828125, 821.7816772460938, 310.818359375, -331.54638671875, 1231.32470703125, 508.54559326171875, 926.3031005859375, 1705.068115234375, -199.5081329345703, -26.437088012695312, -262.95562744140625, -95.54757690429688, 1115.8438720703125, 1522.9049072265625, 51.40656280517578, 500.6360778808594, 805.8099975585938, 1944.558837890625, 381.0941162109375, -189.79246520996094, 783.6351318359375, 448.3289794921875, 214.54934692382812, 1321.320556640625, -469.5140380859375, 2336.880126953125, 351.6147155761719, 626.5858764648438, 784.3666381835938, 7.8192901611328125, 392.9866638183594, 820.7202758789062, 149.00193786621094, 203.00732421875, 657.0288696289062, 815.0504150390625, -153.03668212890625, -57.838104248046875, 164.43157958984375, 1638.29248046875, 910.0960083007812, 1131.2828369140625, -244.63084411621094, -0.6828994750976562, -568.8862915039062, 676.525634765625, 1014.55419921875, 563.4973754882812, 140.14187622070312, 319.2079772949219, 591.156982421875, 1837.8868408203125, -247.68408203125, -1113.2904052734375, 419.1101989746094, 514.656494140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000660.npy"}
|
|
{"epoch": 0.9691629955947136, "step": 661, "batch_size": 64, "mean": 465.8528137207031, "std": 661.1007080078125, "min": -806.78857421875, "p10": -113.97747268676753, "median": 391.78802490234375, "p90": 1149.876892089845, "max": 3360.220458984375, "pos_frac": 0.8125, "sample": [162.77122497558594, -68.1358413696289, 818.8956298828125, -12.292924880981445, 407.2716064453125, 236.46507263183594, 512.14892578125, -172.34945678710938, 686.2684326171875, 356.85870361328125, 1393.1763916015625, 401.154296875, 690.7369995117188, 599.38720703125, 3360.220458984375, 869.552734375, 163.53781127929688, 36.2469482421875, -806.78857421875, 143.44276428222656, -27.104839324951172, 807.7804565429688, 161.91848754882812, 408.2455749511719, 209.897705078125, 234.8673858642578, 323.05889892578125, 895.97265625, 1315.66064453125, 57.66717529296875, 367.88226318359375, 691.99658203125, 537.517822265625, 1258.6929931640625, 501.1600646972656, 339.1458740234375, 207.6007080078125, 344.4885559082031, 238.94424438476562, 451.55816650390625, -520.1796264648438, 382.4217529296875, -133.62388610839844, 818.0401000976562, 759.2701416015625, -675.5877685546875, 743.263916015625, 677.346923828125, 42.04106903076172, 735.41259765625, 2769.395751953125, -671.5199584960938, 297.2652587890625, -2.4284229278564453, 752.4703369140625, 1468.397705078125, 118.37065887451172, 612.6241455078125, -219.36358642578125, 1323.5006103515625, 483.4501953125, 516.7893676757812, 432.1614990234375, -0.45738983154296875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000661.npy"}
|
|
{"epoch": 0.9706314243759178, "step": 662, "batch_size": 64, "mean": 366.068359375, "std": 672.4151000976562, "min": -1134.043701171875, "p10": -433.985546875, "median": 388.0619201660156, "p90": 1119.0428222656253, "max": 2386.697509765625, "pos_frac": 0.71875, "sample": [37.404762268066406, 810.7509765625, 1154.850341796875, -1134.043701171875, 609.3429565429688, -433.87933349609375, 456.5693359375, 874.48876953125, 1830.900634765625, 27.181678771972656, 862.9450073242188, -434.03106689453125, -1081.177001953125, -200.3328399658203, 1452.6690673828125, 72.48168182373047, -19.423593521118164, 324.9965515136719, 2386.697509765625, 811.4171752929688, 1242.976318359375, -173.5159912109375, 392.2413330078125, -178.31683349609375, 185.31399536132812, 856.116455078125, 126.58085632324219, 992.426025390625, 428.4107971191406, 211.57952880859375, 308.82171630859375, -173.41717529296875, 401.1600341796875, -208.71636962890625, 568.6658325195312, 1035.491943359375, 583.774658203125, 383.88250732421875, 674.5719604492188, 665.3795166015625, 35.79814147949219, 111.03547668457031, 846.21630859375, 1915.4981689453125, 778.1521606445312, 1271.099853515625, -766.6363525390625, 462.443115234375, 1006.0492553710938, 338.72015380859375, -589.453369140625, 67.8302001953125, -97.06517028808594, 552.78369140625, -545.7623291015625, -233.49710083007812, 559.5217895507812, 566.7042236328125, 362.6252746582031, 592.4962158203125, 632.6000366210938, -747.2924194335938, -332.2283935546875, -92.49946594238281], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000662.npy"}
|
|
{"epoch": 0.9720998531571219, "step": 663, "batch_size": 64, "mean": 433.2557373046875, "std": 737.7041015625, "min": -1685.197509765625, "p10": -215.95414581298823, "median": 291.51329040527344, "p90": 1184.59794921875, "max": 3112.15087890625, "pos_frac": 0.75, "sample": [834.301025390625, 333.255126953125, 828.6139526367188, 43.39265441894531, 357.2908935546875, 1069.7467041015625, 217.46897888183594, 258.4734191894531, -60.03662109375, 412.4274597167969, 117.75676727294922, 154.00714111328125, 1523.943359375, 842.69921875, 64.02555847167969, 891.4303588867188, 692.3046875, 77.18970489501953, -1685.197509765625, 176.51995849609375, 372.01031494140625, 617.6212768554688, -242.55120849609375, 717.1488647460938, 696.568115234375, 380.46588134765625, 595.028564453125, 175.39663696289062, 455.409423828125, 3112.15087890625, -382.73968505859375, 1147.1094970703125, 1672.810791015625, -8.743881225585938, 610.2477416992188, 2244.854248046875, 1615.9678955078125, 114.84428405761719, -4.827848434448242, 1832.4339599609375, 1081.554931640625, -372.0769348144531, 84.366943359375, 324.55316162109375, -116.58294677734375, 562.4750366210938, 1143.718017578125, 156.48207092285156, 159.14718627929688, 1070.382568359375, 174.46141052246094, -25.971412658691406, 1200.6644287109375, -1115.621337890625, 946.5753173828125, -27.236469268798828, -518.997314453125, -153.8943328857422, 92.73822021484375, 677.0507202148438, -484.18017578125, -89.22595977783203, -83.09671020507812, 170.26446533203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000663.npy"}
|
|
{"epoch": 0.973568281938326, "step": 664, "batch_size": 64, "mean": 498.7710266113281, "std": 679.9046020507812, "min": -832.9625854492188, "p10": -190.6662628173828, "median": 428.28868103027344, "p90": 1297.7966918945315, "max": 2371.5048828125, "pos_frac": 0.75, "sample": [1117.899169921875, 38.604949951171875, 582.326904296875, 688.5014038085938, 867.0596923828125, 14.798484802246094, 306.5361633300781, 2012.0172119140625, -166.17019653320312, 1495.677001953125, -437.248779296875, 766.913330078125, 459.49755859375, 289.4939270019531, 601.9948120117188, 240.2811737060547, -726.7860107421875, 487.626708984375, -54.285423278808594, 537.98291015625, 1260.02685546875, 1313.9837646484375, 61.4161376953125, 1171.17138671875, -226.06204223632812, 175.90951538085938, 2021.60888671875, -200.81820678710938, 523.82666015625, -472.3240661621094, 1169.8836669921875, 120.77902221679688, -37.68414306640625, 1151.30517578125, 533.7971801757812, 399.3593444824219, -832.9625854492188, 426.9016418457031, 707.2197265625, 1158.084228515625, 99.61964416503906, 559.88134765625, 774.3766479492188, -166.9783935546875, 1006.9566040039062, 2292.681640625, 837.5374755859375, 242.7294921875, 2371.5048828125, -162.97430419921875, -148.2977294921875, 1343.597412109375, 1154.06201171875, 231.2456512451172, 249.33419799804688, 541.670166015625, 173.38931274414062, 304.1992492675781, -126.03211975097656, -93.82382202148438, 429.67572021484375, -16.982635498046875, 876.8949584960938, -401.06427001953125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000664.npy"}
|
|
{"epoch": 0.9750367107195301, "step": 665, "batch_size": 64, "mean": 413.22760009765625, "std": 532.580078125, "min": -677.846923828125, "p10": -163.00472640991208, "median": 356.2386474609375, "p90": 1072.6174316406255, "max": 1917.069580078125, "pos_frac": 0.796875, "sample": [-179.24978637695312, 7.847499847412109, 435.0376281738281, 326.3917541503906, -125.0995864868164, 467.73150634765625, 835.5186767578125, -72.05072021484375, 1506.523681640625, 452.78424072265625, 334.6875915527344, 560.227294921875, 150.76730346679688, -599.2620239257812, 659.3967895507812, 5.936616897583008, -19.4991455078125, 190.62698364257812, 13.484634399414062, 565.5139770507812, 701.5594482421875, 138.03077697753906, 475.91265869140625, 359.3812561035156, -29.401105880737305, 267.1313171386719, -677.846923828125, 1384.493408203125, 768.9273681640625, -27.00156021118164, 718.2984619140625, 369.2063293457031, 693.760498046875, 410.2970886230469, 1131.80224609375, 177.47296142578125, 1173.316650390625, 550.8421630859375, 353.0960388183594, 252.17637634277344, 270.0343322753906, 1791.08740234375, 487.3249816894531, -367.93231201171875, 549.7288208007812, 875.4979248046875, 335.87066650390625, -394.36224365234375, 484.6865234375, 934.51953125, 99.15875244140625, 172.85069274902344, 902.7578735351562, 212.28707885742188, 1744.757080078125, -241.0749053955078, 704.0020751953125, -233.54263305664062, 236.62060546875, 881.2057495117188, 1917.069580078125, 73.87843322753906, 417.1480712890625, -115.77664184570312], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000665.npy"}
|
|
{"epoch": 0.9765051395007343, "step": 666, "batch_size": 64, "mean": 422.8899230957031, "std": 609.143798828125, "min": -1034.518310546875, "p10": -308.56541748046874, "median": 345.4820251464844, "p90": 1065.6788696289066, "max": 2593.01220703125, "pos_frac": 0.765625, "sample": [775.6533203125, 418.56365966796875, 697.0806274414062, 437.3017272949219, -444.9573974609375, 699.5975341796875, 1144.1810302734375, 1010.9683837890625, 153.15957641601562, 1006.2235107421875, -37.08326721191406, 260.1238708496094, 393.9250183105469, -473.45263671875, -321.80401611328125, 275.085205078125, 283.1988525390625, -221.38629150390625, -52.76092529296875, -72.22357177734375, 1417.5543212890625, 8.756904602050781, 749.498291015625, 515.2109375, 2093.678466796875, -43.61775207519531, 790.9752197265625, 319.6485595703125, -113.49686431884766, -684.5401000976562, 322.9299621582031, 670.7093505859375, 369.71734619140625, 1089.126220703125, 915.83740234375, 428.6846923828125, 877.5343017578125, -365.6817321777344, 345.5111083984375, 463.7142639160156, 64.48275756835938, 794.76025390625, -277.67535400390625, 1297.70849609375, 492.4940185546875, 224.0245361328125, -1034.518310546875, 1006.166015625, 374.4557189941406, 92.40243530273438, 345.45294189453125, 728.0211791992188, 977.8480224609375, -13.111812591552734, 155.28131103515625, 176.42617797851562, 257.16900634765625, 336.0851745605469, 2593.01220703125, 217.0197296142578, -389.6885681152344, 895.3175659179688, 321.2774963378906, 1327.3997802734375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000666.npy"}
|
|
{"epoch": 0.9779735682819384, "step": 667, "batch_size": 64, "mean": 500.7984313964844, "std": 651.6002197265625, "min": -723.3258056640625, "p10": -286.63959960937495, "median": 467.1950988769531, "p90": 1241.1739746093754, "max": 2263.717529296875, "pos_frac": 0.78125, "sample": [430.1700134277344, -539.2333374023438, 679.6380615234375, -699.628662109375, -69.12619018554688, 975.6906127929688, 1009.771484375, 576.1102294921875, 558.2239379882812, 1279.5003662109375, 256.11767578125, 98.81576538085938, 106.1041030883789, 483.6962890625, 298.54327392578125, -723.3258056640625, 720.4022216796875, 582.46044921875, 1740.6884765625, 902.9332885742188, 796.1853637695312, 221.91024780273438, 1151.7457275390625, 121.13065338134766, 586.9299926757812, 446.23907470703125, 1013.1357421875, -104.82848358154297, 1058.6290283203125, 760.6329956054688, 858.501953125, 640.8128051757812, 923.6598510742188, 671.6336669921875, 354.3490905761719, -267.0516052246094, 385.03887939453125, 450.69390869140625, 369.0933532714844, -41.68457794189453, -85.88975524902344, 1579.275634765625, 1848.2384033203125, 1.8789443969726562, 741.6116943359375, 1510.9017333984375, 661.2941284179688, 236.24972534179688, 166.5206298828125, 749.27587890625, -2.1271133422851562, 882.8641357421875, -418.7113037109375, 205.78309631347656, 2263.717529296875, 2195.3623046875, 80.23662567138672, 1025.251953125, -657.5350952148438, 359.5194396972656, -701.2521362304688, -295.0344543457031, -57.751007080078125, 697.1080932617188], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000667.npy"}
|
|
{"epoch": 0.9794419970631424, "step": 668, "batch_size": 64, "mean": 340.3535461425781, "std": 753.9414672851562, "min": -1319.647705078125, "p10": -508.7701965332031, "median": 289.470458984375, "p90": 1208.374841308594, "max": 2700.5869140625, "pos_frac": 0.703125, "sample": [507.1441955566406, 281.54974365234375, 1343.2490234375, -1218.08740234375, 209.0114288330078, 875.5473022460938, 1392.456298828125, 281.9114990234375, 1072.53759765625, 480.01904296875, -650.4995727539062, 657.4541625976562, 43.94194793701172, 86.03765869140625, 400.55279541015625, 1065.9527587890625, 211.2790069580078, 80.91395568847656, -477.5869140625, -549.7451782226562, -1018.1416015625, 1040.379150390625, -470.332275390625, 914.5549926757812, -143.12338256835938, 649.4835815429688, 351.6813049316406, 436.7830505371094, 38.70695495605469, 674.6400756835938, 320.27294921875, 645.2105102539062, 1186.486083984375, -457.9447937011719, 974.5076293945312, 404.06488037109375, -917.4804077148438, 1030.3824462890625, 112.27350616455078, -231.21798706054688, 146.07464599609375, -73.22016143798828, 1397.879638671875, -84.29747772216797, -522.1344604492188, 1134.8077392578125, -93.80889892578125, -424.31878662109375, 336.9375915527344, -42.612831115722656, 382.05755615234375, 204.9959716796875, 297.0294189453125, 272.1805725097656, -212.77383422851562, 1217.7557373046875, 390.7718200683594, 344.46612548828125, 2295.02783203125, -1319.647705078125, -73.0693588256836, 1815.113037109375, 58.0006103515625, 2700.5869140625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000668.npy"}
|
|
{"epoch": 0.9809104258443465, "step": 669, "batch_size": 64, "mean": 482.85858154296875, "std": 653.193603515625, "min": -1325.0335693359375, "p10": -215.3625427246093, "median": 414.95762634277344, "p90": 1405.1581787109378, "max": 2485.71533203125, "pos_frac": 0.8125, "sample": [176.34275817871094, 54.5469970703125, -69.8462142944336, 1008.4764404296875, -875.6448364257812, 286.45318603515625, 626.2324829101562, 649.8972778320312, 390.2933654785156, -61.99340057373047, 2485.71533203125, -239.46255493164062, 26.818618774414062, 305.8250732421875, -298.3246765136719, 442.2265930175781, 1467.6376953125, -84.43798065185547, 53.14398956298828, 195.8389129638672, 559.12353515625, 795.220703125, 1442.7408447265625, 1288.10107421875, 115.91105651855469, 1726.802001953125, 78.6292724609375, 1428.4749755859375, 1012.31103515625, 55.75821304321289, 589.1480712890625, 1208.2276611328125, -159.12918090820312, 564.0281982421875, 1478.608642578125, -587.4523315429688, -264.1581726074219, 744.7703247070312, 694.564453125, 802.3864135742188, 1350.7523193359375, 360.68731689453125, 172.77069091796875, 349.4142761230469, -1325.0335693359375, 794.1932373046875, 923.6818237304688, 656.771240234375, 417.24755859375, 266.47808837890625, -730.6643676757812, 1013.4323120117188, 412.6676940917969, 475.4321594238281, 676.6248779296875, 769.5625610351562, 49.6303596496582, 274.9455871582031, 499.2525329589844, 1291.216064453125, -12.30593490600586, 1452.7685546875, 349.1826477050781, 300.4357604980469], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000669.npy"}
|
|
{"epoch": 0.9823788546255506, "step": 670, "batch_size": 64, "mean": 489.8502502441406, "std": 676.6763305664062, "min": -1392.3499755859375, "p10": -300.7358154296875, "median": 355.1375427246094, "p90": 1427.1839965820316, "max": 1819.99658203125, "pos_frac": 0.765625, "sample": [164.29751586914062, 501.4833984375, 247.48565673828125, 1357.416259765625, 451.7076110839844, -193.46017456054688, -202.74624633789062, -111.5252914428711, 1098.7005615234375, 544.1266479492188, 342.17242431640625, -85.94085693359375, -1392.3499755859375, 1819.99658203125, 1268.658935546875, 1751.531494140625, 697.0822143554688, 319.5837707519531, 1217.705078125, 640.0830078125, 118.91871643066406, -551.8788452148438, 1374.1748046875, -320.3846435546875, -562.6629028320312, 368.1026611328125, 1496.9658203125, 53.268009185791016, 1092.9810791015625, -450.6227111816406, 1498.76416015625, 812.7651977539062, -217.17408752441406, 233.23858642578125, 183.30056762695312, 1449.9022216796875, 1180.03173828125, -258.216064453125, 105.87265014648438, 1330.2734375, 423.8980712890625, 1207.1009521484375, 826.507568359375, -301.14483642578125, 1767.54248046875, 879.8587036132812, 297.34771728515625, 337.9877624511719, -299.78143310546875, -319.6204833984375, 14.249664306640625, 1466.77490234375, 176.61175537109375, 262.06658935546875, 1270.7540283203125, 169.58868408203125, 283.6031494140625, 1046.836669921875, -29.0986328125, 841.1270751953125, 448.8236999511719, 559.3953247070312, 45.68023681640625, 600.678466796875], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000670.npy"}
|
|
{"epoch": 0.9838472834067548, "step": 671, "batch_size": 64, "mean": 403.0694885253906, "std": 660.3814086914062, "min": -1592.95166015625, "p10": -218.1745468139648, "median": 378.3988037109375, "p90": 1274.9480224609376, "max": 1948.1341552734375, "pos_frac": 0.75, "sample": [-6.6175689697265625, -1592.95166015625, 458.1715393066406, 83.89578247070312, -131.52203369140625, 702.186279296875, 228.87545776367188, 385.31243896484375, 607.7635498046875, 397.26873779296875, 425.03485107421875, -76.51065826416016, 155.30348205566406, 1095.225341796875, 1027.2119140625, 62.40293884277344, 199.46327209472656, 253.86444091796875, 886.233642578125, 780.4577026367188, 129.84909057617188, 1490.740966796875, -1452.375, 1263.42724609375, 809.0897827148438, -44.17890930175781, 1940.66943359375, 453.3096618652344, 279.123046875, 594.2556762695312, 21.29281997680664, -621.224365234375, -478.08740234375, 505.97137451171875, 1288.4788818359375, -174.70086669921875, 371.48516845703125, 240.47024536132812, 715.063720703125, 1279.885498046875, 1347.950927734375, 415.6697692871094, -230.6935272216797, 91.9177474975586, 811.7938232421875, -18.702835083007812, 302.4859313964844, 933.3397216796875, -334.2149353027344, -65.6666030883789, 797.0186767578125, 428.6191101074219, -96.10980987548828, 255.33148193359375, 1257.9156494140625, 769.6856079101562, -188.96359252929688, -619.4912719726562, 250.87147521972656, 1379.1580810546875, 407.45599365234375, 1948.1341552734375, 1056.130615234375, 343.1943664550781], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000671.npy"}
|
|
{"epoch": 0.9853157121879589, "step": 672, "batch_size": 64, "mean": 410.8834228515625, "std": 752.448486328125, "min": -1288.2999267578125, "p10": -549.0892730712889, "median": 393.7780456542969, "p90": 1259.2124877929691, "max": 2723.106689453125, "pos_frac": 0.703125, "sample": [395.748046875, -103.96798706054688, -294.0145263671875, 210.60635375976562, -141.99342346191406, 86.56593322753906, 847.4580078125, 451.087158203125, 1744.4349365234375, 400.7062072753906, 91.96709442138672, -125.15869903564453, -389.0892639160156, -345.5241394042969, 514.002685546875, 255.32754516601562, -132.05532836914062, -647.1617431640625, 430.04595947265625, 1720.47607421875, 92.90031433105469, 781.376220703125, 19.01313018798828, -122.42709350585938, 1185.3748779296875, -1288.2999267578125, 1134.6527099609375, -926.572021484375, 498.5713195800781, 2723.106689453125, -141.09730529785156, 976.7485961914062, 977.1478271484375, 1114.489501953125, 1177.431396484375, 724.152587890625, 610.9854125976562, -617.6607055664062, 175.81777954101562, 2048.20556640625, -964.80078125, -11.803550720214844, 378.44061279296875, 1288.6287841796875, 881.6281127929688, 671.031494140625, 923.7061157226562, 205.20172119140625, 882.9114379882812, 663.8466186523438, 1562.4140625, 237.41506958007812, -162.46856689453125, 663.8345947265625, 86.479248046875, 811.9414672851562, 1190.574462890625, 113.72386932373047, -80.25444030761719, -763.0836181640625, 590.841064453125, 391.80804443359375, 1403.285888671875, -782.1412353515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000672.npy"}
|
|
{"epoch": 0.986784140969163, "step": 673, "batch_size": 64, "mean": 382.5746765136719, "std": 615.8585205078125, "min": -1285.218017578125, "p10": -278.36803283691404, "median": 384.06573486328125, "p90": 1067.8050292968758, "max": 2976.3876953125, "pos_frac": 0.78125, "sample": [-168.8763885498047, -556.5552368164062, 487.8465881347656, 896.39697265625, 287.489501953125, 493.530517578125, 714.571533203125, 355.5478820800781, 1273.83154296875, 476.0246276855469, -147.24717712402344, 1442.9886474609375, 488.0585021972656, 573.9228515625, 112.60514831542969, 167.3570556640625, -462.04150390625, 834.8611450195312, 1556.48046875, 340.4400634765625, -298.28021240234375, 349.64984130859375, 339.34527587890625, -76.32359313964844, 115.02799987792969, -1285.218017578125, 248.76907348632812, 611.4496459960938, -505.3646545410156, 171.85182189941406, 775.254638671875, 1400.9649658203125, 147.64259338378906, -32.47206497192383, 687.9336547851562, 588.5983276367188, 409.80487060546875, -631.5573120117188, 514.184814453125, 779.3914794921875, 795.323974609375, 85.35831451416016, 236.52828979492188, 679.141357421875, 445.45849609375, 72.53900909423828, 495.4972229003906, -146.4430389404297, 434.6453552246094, 2976.3876953125, 132.73696899414062, 107.09164428710938, 536.3706665039062, 633.546142578125, -665.0072021484375, 554.3048095703125, 448.197021484375, 1141.265625, 118.75726318359375, -231.90628051757812, -43.067344665527344, 617.45556640625, 1224.3853759765625, 358.32659912109375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000673.npy"}
|
|
{"epoch": 0.9882525697503671, "step": 674, "batch_size": 64, "mean": 491.29443359375, "std": 670.5330810546875, "min": -585.914794921875, "p10": -274.142431640625, "median": 424.4930114746094, "p90": 1127.567077636719, "max": 3157.691650390625, "pos_frac": 0.75, "sample": [-103.80712890625, 34.95112228393555, 866.1129760742188, 665.927001953125, 595.86962890625, 759.8340454101562, 34.72062683105469, -75.46272277832031, -504.7997741699219, 1142.0809326171875, 1334.7086181640625, 902.6949462890625, 75.78517150878906, 485.09320068359375, -279.18896484375, 419.74432373046875, -220.42893981933594, 2031.13427734375, 692.0784912109375, 3157.691650390625, 495.68341064453125, 454.4702453613281, -21.337787628173828, 1079.0560302734375, 429.24169921875, -262.3671875, 1081.14013671875, 491.032958984375, 609.2598266601562, -135.598388671875, 1093.701416015625, 2043.3104248046875, -126.74111938476562, 252.09503173828125, 363.38629150390625, 824.5908203125, -298.610595703125, -328.69818115234375, -368.4622497558594, 852.9804077148438, 594.8135986328125, 383.8999938964844, 339.234130859375, 1670.75537109375, 350.0727844238281, 875.7078857421875, 815.489990234375, 953.5886840820312, 557.8973388671875, 52.8646354675293, -585.914794921875, 216.73721313476562, 1856.6981201171875, -340.0126037597656, -3.3733768463134766, 660.3894653320312, 174.22518920898438, 712.335205078125, 396.5019836425781, 635.8399047851562, 151.55886840820312, -23.398359298706055, 158.23379516601562, 295.827880859375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000674.npy"}
|
|
{"epoch": 0.9897209985315712, "step": 675, "batch_size": 64, "mean": 507.7829284667969, "std": 684.7032470703125, "min": -721.4531860351562, "p10": -144.97400817871093, "median": 367.55784606933594, "p90": 1369.0150390625001, "max": 2773.9462890625, "pos_frac": 0.78125, "sample": [275.84600830078125, 1376.721435546875, 570.5471801757812, 122.56605529785156, 40.80455780029297, 130.53907775878906, 819.9605102539062, -60.765045166015625, 587.3864135742188, 295.8379821777344, 823.6610717773438, -197.562255859375, 610.9923706054688, 1046.704345703125, -145.1607666015625, -34.87342834472656, 167.44061279296875, 1351.033447265625, 566.4815673828125, 484.4682922363281, 757.88037109375, -42.085731506347656, 2268.2216796875, 794.6336669921875, 38.22838592529297, 408.3672180175781, 8.899776458740234, 735.0967407226562, 568.273193359375, 489.73590087890625, 370.76580810546875, 1613.08056640625, 358.77203369140625, 1764.67431640625, -144.53823852539062, 1317.924560546875, 2773.9462890625, -242.2377471923828, 971.0690307617188, 2072.23681640625, 200.9665069580078, 81.65145874023438, -36.424110412597656, 119.94927978515625, 295.9423828125, -37.16363525390625, -721.4531860351562, 205.57638549804688, 655.4408569335938, 134.41323852539062, 143.7161407470703, 1562.703857421875, 743.5751953125, -563.710693359375, -614.6024169921875, 401.27301025390625, 79.6268310546875, 722.6748046875, -53.313201904296875, 1343.50244140625, 364.3498840332031, 1130.568115234375, 1059.6484375, -436.3777160644531], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000675.npy"}
|
|
{"epoch": 0.9911894273127754, "step": 676, "batch_size": 64, "mean": 473.14080810546875, "std": 591.5277099609375, "min": -881.29736328125, "p10": -125.45591659545896, "median": 405.97882080078125, "p90": 1348.4051757812501, "max": 2309.787353515625, "pos_frac": 0.75, "sample": [12.081199645996094, 1600.6839599609375, 856.144775390625, 890.0971069335938, -24.715927124023438, -95.0938491821289, 417.3661193847656, 769.2666015625, 33.40665054321289, -881.29736328125, 500.2962646484375, 36.84002685546875, -26.817031860351562, 1264.7967529296875, 1741.3074951171875, 2309.787353515625, -74.14209747314453, -192.4457244873047, -56.17271423339844, -195.20553588867188, -33.764183044433594, 507.9552001953125, 610.9110107421875, -77.37026977539062, -138.46823120117188, 751.22119140625, 100.68170166015625, 1266.619384765625, 394.5915222167969, 1564.14599609375, -36.591949462890625, 691.3763427734375, 265.2863464355469, 713.468994140625, 157.6512451171875, 794.1083374023438, 1489.0048828125, 432.961669921875, 508.7579345703125, 1357.17919921875, 735.5420532226562, -12.999168395996094, 168.30918884277344, -351.82061767578125, 1352.855224609375, 1045.8359375, 65.12466430664062, 873.0034790039062, 520.6793212890625, 350.17596435546875, 794.6827392578125, 482.000732421875, 250.1738739013672, 388.748291015625, 47.359474182128906, 306.1488037109375, 876.2926635742188, -203.265625, 1338.021728515625, 132.1887969970703, 456.4854736328125, 190.71820068359375, -232.9993438720703, 501.83868408203125], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000676.npy"}
|
|
{"epoch": 0.9926578560939795, "step": 677, "batch_size": 64, "mean": 454.3424377441406, "std": 740.900634765625, "min": -888.8284912109375, "p10": -234.83784637451168, "median": 357.67608642578125, "p90": 1206.7333007812501, "max": 3911.780029296875, "pos_frac": 0.75, "sample": [227.358642578125, 2733.983642578125, 619.7821655273438, -37.871620178222656, 515.0523681640625, -250.0948486328125, -133.03729248046875, -338.1531677246094, 423.0880126953125, -96.79296875, 516.1799926757812, 539.905029296875, 37.82206726074219, 1248.915771484375, 312.00616455078125, 877.1361694335938, 758.9479370117188, 27.867996215820312, 278.23895263671875, 193.4103240966797, 1189.746337890625, 274.5358581542969, 1156.944580078125, -147.3566436767578, 347.38482666015625, 1300.04736328125, 48.55079650878906, 535.281982421875, 357.99395751953125, 1170.6090087890625, 357.35821533203125, -70.06588745117188, -199.23817443847656, 220.15924072265625, 267.4933776855469, -678.33642578125, 951.6279296875, 360.28399658203125, 3911.780029296875, 127.73388671875, 104.13370513916016, 510.73529052734375, 843.1228637695312, 707.8933715820312, -144.15408325195312, 1214.013427734375, 424.0644836425781, 233.49813842773438, 735.6649169921875, 129.64466857910156, 1331.950927734375, -40.51301956176758, 748.5584716796875, 390.9254455566406, 566.13330078125, -397.4954528808594, -888.8284912109375, 1752.10888671875, 538.6760864257812, 677.7469482421875, 752.90576171875, -299.64373779296875, -738.741455078125, -10.763275146484375], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000677.npy"}
|
|
{"epoch": 0.9941262848751835, "step": 678, "batch_size": 64, "mean": 533.0916748046875, "std": 723.9562377929688, "min": -1719.78564453125, "p10": -146.03075408935544, "median": 431.1685333251953, "p90": 1159.4658447265626, "max": 2627.169677734375, "pos_frac": 0.8125, "sample": [268.0325622558594, 935.142578125, 540.603271484375, 620.8671264648438, 513.5473022460938, -44.203521728515625, 551.2800903320312, -1719.78564453125, 120.21722412109375, -254.10696411132812, -89.17951965332031, 1174.6121826171875, 71.95487976074219, 1124.1243896484375, -52.176719665527344, 568.0963134765625, 881.7728881835938, -302.06201171875, 873.6029663085938, 877.5911254882812, 1051.3853759765625, 1082.251708984375, 15.596214294433594, 2233.02880859375, 1092.927734375, 2627.169677734375, 63.28874206542969, 283.633544921875, -13.404376983642578, 1118.8629150390625, -494.4621276855469, 176.26278686523438, 257.14141845703125, 785.7849731445312, 276.4571838378906, 1036.172607421875, 187.32064819335938, 1100.8282470703125, 293.56341552734375, 892.5184936523438, -155.98558044433594, 823.5193481445312, 1275.651123046875, 1641.9202880859375, 338.1663818359375, 259.3545837402344, 2300.935791015625, 2360.7724609375, 53.599788665771484, 572.7520141601562, 279.1295166015625, -659.1109619140625, 378.55035400390625, 23.467056274414062, 57.32592010498047, 105.43965148925781, 625.3677368164062, 1084.4849853515625, 602.8096313476562, 482.370361328125, 379.9667053222656, -122.80282592773438, 865.665283203125, -181.74252319335938], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000678.npy"}
|
|
{"epoch": 0.9955947136563876, "step": 679, "batch_size": 64, "mean": 382.27972412109375, "std": 605.42822265625, "min": -721.9423828125, "p10": -290.47289886474607, "median": 288.74595642089844, "p90": 1303.9257202148438, "max": 2081.857666015625, "pos_frac": 0.71875, "sample": [576.9481811523438, 60.07221221923828, 1304.033203125, 2081.857666015625, 405.5057067871094, 713.7910766601562, 850.9905395507812, 461.0498352050781, 1303.6749267578125, 350.2202453613281, 462.9739990234375, 445.6465148925781, 421.3553161621094, 265.99810791015625, 994.50732421875, -418.6412658691406, -160.72702026367188, 123.37472534179688, 193.1436767578125, 940.5819091796875, 628.3192138671875, 1550.734619140625, -105.00679016113281, 104.72634887695312, 162.68836975097656, -320.4270324707031, 321.12890625, 1531.2288818359375, -437.1761169433594, 514.226806640625, 838.5325927734375, 64.47564697265625, -68.7821044921875, -32.32598114013672, 941.73486328125, 380.033203125, 1523.31103515625, 146.35865783691406, -198.78350830078125, 16.831275939941406, 183.47598266601562, -89.31379699707031, 474.9818420410156, -11.707000732421875, -307.0335388183594, 1477.572265625, 311.4938049316406, -159.60733032226562, -721.9423828125, -182.45046997070312, 875.4227294921875, -547.3265991210938, 592.132080078125, -251.83140563964844, 473.66302490234375, 262.6438903808594, -77.91958618164062, 192.17138671875, 786.8731689453125, 1927.356201171875, 204.11148071289062, 630.18896484375, -641.869873046875, 126.63379669189453], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000679.npy"}
|
|
{"epoch": 0.9970631424375918, "step": 680, "batch_size": 64, "mean": 508.6307678222656, "std": 753.7247924804688, "min": -2368.30126953125, "p10": -207.59720916748043, "median": 377.6241455078125, "p90": 1590.0399047851568, "max": 2350.96435546875, "pos_frac": 0.828125, "sample": [1296.3240966796875, 351.5274963378906, 422.98858642578125, 933.2130126953125, 130.03208923339844, 1147.36181640625, 1782.72705078125, 579.7132568359375, 207.71243286132812, 389.7713317871094, 572.5926513671875, 176.1283416748047, 825.5936889648438, -410.1980895996094, 1064.0260009765625, 1476.6328125, 729.8150024414062, -601.4407958984375, -220.77650451660156, 874.3297119140625, 1139.38232421875, 199.80450439453125, -130.1898956298828, 331.3258056640625, -234.71505737304688, 1032.0106201171875, 888.0681762695312, 179.3724822998047, 630.9256591796875, 1713.23193359375, 182.5583953857422, 96.91455841064453, 156.85507202148438, 1747.037353515625, 1087.2939453125, 104.12748718261719, 166.20574951171875, 506.819580078125, 1850.8428955078125, -583.4601440429688, 123.61871337890625, 365.4769592285156, 485.53314208984375, 595.07958984375, 152.3799285888672, -2368.30126953125, 1762.2344970703125, -176.84552001953125, -530.1336059570312, 1470.2154541015625, 544.253662109375, 2350.96435546875, 21.014297485351562, 25.910751342773438, 195.80123901367188, 1432.8173828125, 723.747802734375, 1638.6429443359375, -40.15250015258789, 154.63082885742188, -129.75466918945312, 205.66055297851562, 30.554046630859375, 726.5357666015625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000680.npy"}
|
|
{"epoch": 0.9985315712187959, "step": 681, "batch_size": 64, "mean": 399.1241760253906, "std": 649.6876831054688, "min": -895.2818603515625, "p10": -391.7209564208984, "median": 424.4937744140625, "p90": 1222.506518554688, "max": 2216.66552734375, "pos_frac": 0.671875, "sample": [748.1847534179688, -339.79937744140625, 582.5130615234375, 138.11358642578125, 901.71630859375, -895.2818603515625, -6.050323486328125, 464.4501647949219, -442.2178649902344, 267.6610107421875, -274.43316650390625, 385.4816589355469, -449.4920654296875, -429.92401123046875, 1278.3673095703125, 1697.7073974609375, 472.12420654296875, -118.93962860107422, -292.421630859375, 1050.0142822265625, -197.4716796875, -10.385345458984375, 819.551025390625, 463.5058898925781, 1749.572998046875, 666.4053344726562, 205.30496215820312, 1013.956787109375, 908.427490234375, 594.1490478515625, 146.7320556640625, 489.4520263671875, 255.5406494140625, -88.36013793945312, -166.4520263671875, 206.1329803466797, 258.12005615234375, -714.7716064453125, 487.2286071777344, -116.41911315917969, -371.00103759765625, 2216.66552734375, -672.9071044921875, 909.4166870117188, -115.46639251708984, 500.8175048828125, -400.6009216308594, 208.71873474121094, 59.83058166503906, -238.00555419921875, 947.5053100585938, -334.4187316894531, 942.0847778320312, 578.6897583007812, 1340.541748046875, 528.7022094726562, 1092.1646728515625, 1035.6976318359375, 1070.17138671875, 711.170166015625, 237.62315368652344, 1305.84033203125, 1425.8939208984375, 856.818603515625], "npy": "/workspace/dynamic-dpo-v4/outputs/llama-3-8b-base-new-dpo-hh-helpful-4xh200-batch-64-s_star-0.4-eta-0.1-q_t-0.45-beta-0p8-20260429-085449/margin_logs/step_0000681.npy"}
|